Skip to content

Commit

Permalink
Test
Browse files Browse the repository at this point in the history
Signed-off-by: Janusz Lisiecki <jlisiecki@nvidia.com>
  • Loading branch information
JanuszL committed Sep 9, 2024
1 parent d886ad6 commit 70b57be
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class VideoSample {
public:
Tensor<Backend> data_;
int label_;
int first_frame_;
};

class VideoLoaderDecoderBase {
Expand All @@ -46,6 +47,7 @@ class VideoLoaderDecoderBase {
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
has_frame_no_ = spec.GetArgument<bool>("enable_frame_num");
DALI_ENFORCE(
!has_labels_ || labels_.size() == filenames_.size(),
make_string(
Expand All @@ -61,6 +63,7 @@ class VideoLoaderDecoderBase {
std::vector<std::string> filenames_;
std::vector<int> labels_;
bool has_labels_ = false;
bool has_frame_no_ = false;

Index current_index_ = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void VideoLoaderDecoderCpu::ReadSample(VideoSample<CPUBackend> &sample) {
if (has_labels_) {
sample.label_ = labels_[sample_span.video_idx_];
}
if (has_frame_no_) {
sample.first_frame_ = sample_span.start_;
}
}

Index VideoLoaderDecoderCpu::SizeImpl() {
Expand Down
23 changes: 20 additions & 3 deletions dali/operators/reader/video_reader_decoder_cpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderCpu::VideoReaderDecoderCpu(const OpSpec &spec)
: DataReader<CPUBackend, VideoSampleCpu, VideoSampleCpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderCpu>(spec);
this->SetInitialSnapshot();
}
Expand All @@ -32,16 +33,28 @@ void VideoReaderDecoderCpu::RunImpl(SampleWorkspace &ws) {
video_output.Copy(sample.data_);
video_output.SetSourceInfo(sample.data_.GetSourceInfo());

int out_index = 1;
if (has_labels_) {
auto &label_output = ws.Output<CPUBackend>(1);
auto &label_output = ws.Output<CPUBackend>(out_index);
label_output.Resize({}, DALIDataType::DALI_INT32);
label_output.mutable_data<int>()[0] = sample.label_;
out_index++;
}
if (has_frame_no_) {
auto &frame_no_output = ws.Output<CPUBackend>(out_index);
frame_no_output.Resize({}, DALIDataType::DALI_INT32);
frame_no_output.mutable_data<int>()[0] = sample.first_frame_;
out_index++;
}
}

namespace detail {
inline int VideoReaderDecoderOutputFn(const OpSpec &spec) {
return spec.HasArgument("labels") ? 2 : 1;
int num_outputs = 1;
if (spec.HasArgument("labels")) num_outputs++;
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
if (enable_frame_num) num_outputs++;
return num_outputs;
}
} // namespace detail

Expand All @@ -68,6 +81,10 @@ even in the variable frame rate scenario.)code")
.AddArg("sequence_length",
R"code(Frames to load per sequence.)code",
DALI_INT32)
.AddOptionalArg("enable_frame_num",
R"code(If set, returns the first frame number in the decoded sequence
as a separate output.)code",
false)
.AddOptionalArg("step",
R"code(Frame interval between each sequence.
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_cpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class VideoReaderDecoderCpu

private:
bool has_labels_ = false;
bool has_frame_no_ = false;
};

} // namespace dali
Expand Down
66 changes: 45 additions & 21 deletions dali/operators/reader/video_reader_decoder_gpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderGpu::VideoReaderDecoderGpu(const OpSpec &spec)
: DataReader<GPUBackend, VideoSampleGpu, VideoSampleGpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_no_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderGpu>(spec);
this->SetInitialSnapshot();
}
Expand Down Expand Up @@ -50,14 +51,21 @@ bool VideoReaderDecoderGpu::SetupImpl(

output_desc[0] = { video_shape, DALI_UINT8 };

if (!has_labels_) {
return true;
int out_index = 1;
if (has_labels_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}
if (has_frame_no_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}

output_desc[1] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};

return true;
}
Expand All @@ -80,23 +88,39 @@ void VideoReaderDecoderGpu::RunImpl(Workspace &ws) {
video_output.SetSourceInfo(sample_id, sample.data_.GetSourceInfo());
}

if (!has_labels_) {
return;
}
int out_index = 1;
if (has_labels_) {
auto &labels_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> labels_cpu;

auto &labels_output = ws.Output<GPUBackend>(1);
SmallVector<int, 32> labels_cpu;
for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
}

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
if (has_frame_no_) {
auto &frame_no_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> frame_no_output_cpu;

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
frame_no_output_cpu[sample_id] = sample.span_ ? sample.span_->start_ : -1;
}

MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
MemCopy(
frame_no_output.AsTensor().raw_mutable_data(),
frame_no_output_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
}

DALI_REGISTER_OPERATOR(experimental__readers__Video, VideoReaderDecoderGpu, GPU);
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_gpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class VideoReaderDecoderGpu : public DataReader<GPUBackend, VideoSampleGpu, Vide

private:
bool has_labels_ = false;
bool has_frame_no_ = false;
};

} // namespace dali
Expand Down
33 changes: 27 additions & 6 deletions dali/operators/reader/video_reader_decoder_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
virtual void AssertFrame(
int frame_id, const uint8_t *frame, TestVideo &ground_truth) = 0;

template<typename Backend>
int GetFrameNo(dali::TensorList<Backend> &device_frame_no);

private:
template<typename Backend>
void RunTestImpl(
Expand Down Expand Up @@ -129,16 +132,15 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
.AddArg("device", backend)
.AddArg("sequence_length", sequence_length)
.AddArg("random_shuffle", true)
.AddArg("enable_frame_num", true)
.AddArg("initial_fill", cfr_videos_[0].NumFrames())
.AddArg(
"filenames",
std::vector<std::string>{cfr_videos_paths_[0]})
.AddOutput("frames", backend));

pipe.Build({{"frames", backend}});
.AddOutput("frames", backend)
.AddOutput("frame_no", backend));

// ToFix
std::vector<int> expected_order = {29, 46, 33, 6, 37};
pipe.Build({{"frames", backend}, {"frame_no", backend}});

int num_sequences = 5;

Expand All @@ -149,9 +151,10 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {

auto &frame_video_output = ws.Output<Backend>(0);
const auto sample = frame_video_output.template tensor<uint8_t>(0);
int frame_no = GetFrameNo(ws.Output<Backend>(1));

// We want to access correct order, so we compare only the first frame of the sequence
AssertFrame(expected_order[sequence_id], sample, ground_truth_video);
AssertFrame(frame_no, sample, ground_truth_video);
}
}
};
Expand All @@ -169,6 +172,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::CPUBackend>() {
RunShuffleTestImpl<dali::CPUBackend>("cpu", dali::CPU_ONLY_DEVICE_ID);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameNo(
dali::TensorList<dali::CPUBackend> &device_frame_no) {
const auto frame_no = device_frame_no.template tensor<int>(0);
int frame_no_buffer = -1;
std::copy_n(frame_no, 1, &frame_no_buffer);
return frame_no_buffer;
}

template<>
void VideoReaderDecoderBaseTest::RunTest<dali::GPUBackend>(
std::vector<std::string> &videos_paths,
Expand All @@ -182,6 +194,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::GPUBackend>() {
RunShuffleTestImpl<dali::GPUBackend>("gpu", 0);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameNo(
dali::TensorList<dali::GPUBackend> &device_frame_no) {
const auto frame_no = device_frame_no.template tensor<int>(0);
int frame_no_buffer = -1;
MemCopy(&frame_no_buffer, frame_no, sizeof(int));
return frame_no_buffer;
}

class VideoReaderDecoderCpuTest : public VideoReaderDecoderBaseTest {
public:
void AssertLabel(const int *label, int ground_truth_label) override {
Expand Down

0 comments on commit 70b57be

Please sign in to comment.