Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .as_cpu() to TensorCPU and TensorListCPU #5751

Merged
merged 5 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions dali/python/backend_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,11 @@ void ExposeTensor(py::module &m) {
Returns a `TensorGPU` object being a copy of this `TensorCPU`.
)code",
py::return_value_policy::take_ownership)
.def("as_cpu", [](Tensor<CPUBackend> &t) -> Tensor<CPUBackend>& {
return t;
},
R"code(Bypass, since the object is already an instance of `TensorCPU`.)code",
jantonguirao marked this conversation as resolved.
Show resolved Hide resolved
py::return_value_policy::reference_internal)
.def("copy_to_external",
[](Tensor<CPUBackend> &t, py::object p) {
CopyToExternal<mm::memory_kind::host>(ctypes_void_ptr(p), t, AccessOrder::host(), false);
Expand Down Expand Up @@ -1174,6 +1179,10 @@ void ExposeTensorList(py::module &m) {
Returns a `TensorListGPU` object being a copy of this `TensorListCPU`.
)code",
py::return_value_policy::take_ownership)
.def("as_cpu", [](TensorList<CPUBackend> &t) -> TensorList<CPUBackend> & {
return t;
}, R"code(Passthrough, as it is already an instance of `TensorListCPU`.)code",
py::return_value_policy::reference_internal)
.def("layout", [](TensorList<CPUBackend> &t) {
return t.GetLayout().str();
})
Expand Down
2 changes: 2 additions & 0 deletions dali/python/nvidia/dali/tensors.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class TensorCPU:
@overload
def __init__(self, b, layout: str = ..., is_pinned: bool = ...) -> None: ...
def _as_gpu(self, *args, **kwargs) -> Any: ...
def as_cpu(self) -> TensorCPU: ...

# def __dlpack__(self, stream: Optional[int] = None) -> capsule: ...

Expand Down Expand Up @@ -92,6 +93,7 @@ class TensorListCPU:
def __init__(self, list_of_tensors: list, layout: str = ...) -> None: ...

# def _as_gpu(self, *args, **kwargs) -> TensorListGPU: ...
def as_cpu(self) -> TensorListCPU: ...

# def as_array(self) -> numpy.ndarray: ...
def as_array(self) -> Any: ...
Expand Down
4 changes: 1 addition & 3 deletions dali/test/python/auto_aug/test_augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from PIL import Image, ImageEnhance, ImageOps
from nose2.tools import params, cartesian_params

import nvidia.dali.tensors as _tensors
from nvidia.dali import fn, pipeline_def
from nvidia.dali.auto_aug import augmentations as a
from nvidia.dali.auto_aug.core._utils import get_translations as _get_translations
Expand Down Expand Up @@ -90,8 +89,7 @@ def pipeline():
if dev == "gpu":
output = output.as_cpu()
output = [np.array(sample) for sample in output]
if isinstance(data, _tensors.TensorListGPU):
data = data.as_cpu()
data = data.as_cpu()
data = [np.array(sample) for sample in data]

if modality == "image":
Expand Down
5 changes: 2 additions & 3 deletions dali/test/python/auto_aug/test_rand_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from scipy.stats import chisquare
from nose2.tools import params

from nvidia.dali import fn, tensors, types
from nvidia.dali import fn, types
from nvidia.dali import pipeline_def
from nvidia.dali.auto_aug import rand_augment
from nvidia.dali.auto_aug.core import augmentation
Expand All @@ -43,8 +43,7 @@ def debug_discrepancy_helper(*batch_pairs):
"""

def as_array_list(batch):
if isinstance(batch, tensors.TensorListGPU):
batch = batch.as_cpu()
batch = batch.as_cpu()
return [np.array(sample) for sample in batch]

batch_names = [name for _, _, name in batch_pairs]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import glob
import numpy as np
import itertools
import nvidia.dali as dali
from nvidia.dali import fn, pipeline_def, types
from test_utils import (
compare_pipelines,
Expand All @@ -41,9 +40,7 @@


def tensor_list_to_array(tensor_list):
if isinstance(tensor_list, dali.backend_impl.TensorListGPU):
tensor_list = tensor_list.as_cpu()
return tensor_list.as_array()
return tensor_list.as_cpu().as_array()


# Check whether a given pipeline is stateless
Expand Down
7 changes: 2 additions & 5 deletions dali/test/python/decoder/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import os
from itertools import cycle
from test_utils import get_dali_extra_path, is_mulit_gpu, skip_if_m60
from nvidia.dali.backend import TensorListGPU
from nose2.tools import params
from nose_utils import SkipTest, attr, assert_raises

Expand Down Expand Up @@ -76,8 +75,7 @@ def video_decoder_iter(batch_size, epochs=1, device="cpu", module=fn.experimenta
)
for _ in range(int((epochs * len(files) + batch_size - 1) / batch_size)):
(output,) = pipe.run()
if isinstance(output, TensorListGPU):
output = output.as_cpu()
output = output.as_cpu()
for i in range(batch_size):
yield np.array(output[i])

Expand All @@ -87,8 +85,7 @@ def ref_iter(epochs=1, device="cpu"):
for filename in filenames:
pipe = reference_pipeline(filename, device=device)
(output,) = pipe.run()
if isinstance(output, TensorListGPU):
output = output.as_cpu()
output = output.as_cpu()
yield np.array(output[0])


Expand Down
15 changes: 4 additions & 11 deletions dali/test/python/operator_1/test_arithmetic_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import nvidia.dali.math as math
from nvidia.dali.tensors import TensorListGPU
import numpy as np
from nose_utils import attr, raises, assert_raises, assert_equals
from nose2.tools import params
Expand Down Expand Up @@ -226,12 +225,6 @@ def default_range(*types):
]


def as_cpu(tl):
if isinstance(tl, TensorListGPU):
return tl.as_cpu()
return tl


def max_dtype(kind, left_dtype, right_dtype):
return np.dtype(kind + str(max(left_dtype.itemsize, right_dtype.itemsize)))

Expand Down Expand Up @@ -449,8 +442,8 @@ def get_numpy_input(input, kind, orig_type, target_type):


def extract_un_data(pipe_out, sample_id, kind, target_type):
input = as_cpu(pipe_out[0]).at(sample_id)
out = as_cpu(pipe_out[1]).at(sample_id)
input = np.array(pipe_out[0][sample_id].as_cpu())
out = np.array(pipe_out[1][sample_id].as_cpu())
assert_equals(out.dtype, target_type)
in_np = get_numpy_input(input, kind, input.dtype.type, target_type)
return in_np, out
Expand All @@ -465,15 +458,15 @@ def extract_data(pipe_out, sample_id, kinds, target_type):
arity = len(kinds)
inputs = []
for i in range(arity):
dali_in = as_cpu(pipe_out[i]).at(sample_id)
dali_in = np.array(pipe_out[i][sample_id].as_cpu())
numpy_in = get_numpy_input(
dali_in,
kinds[i],
dali_in.dtype.type,
target_type if target_type is not None else dali_in.dtype.type,
)
inputs.append(numpy_in)
out = as_cpu(pipe_out[arity]).at(sample_id)
out = np.array(pipe_out[arity][sample_id].as_cpu())
return tuple(inputs) + (out,)


Expand Down
9 changes: 3 additions & 6 deletions dali/test/python/operator_1/test_batch_permute.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import nvidia.dali as dali
import nvidia.dali.fn as fn
from nvidia.dali.pipeline import Pipeline
import numpy as np
Expand Down Expand Up @@ -67,8 +66,7 @@ def _test_permute_batch(device, type):
for i in range(10):
orig, permuted, idxs = pipe.run()
idxs = [int(idxs.at(i)) for i in range(batch_size)]
if isinstance(orig, dali.backend.TensorListGPU):
orig = orig.as_cpu()
orig = orig.as_cpu()
ref = [orig.at(idx) for idx in idxs]
check_batch(permuted, ref, len(ref), 0, 0, "abc")

Expand All @@ -88,10 +86,9 @@ def _test_permute_batch_fixed(device):
idxs = [4, 8, 0, 6, 3, 5, 2, 9, 7, 1]
pipe.set_outputs(data, fn.permute_batch(data, indices=idxs))

for i in range(10):
for _ in range(10):
orig, permuted = pipe.run()
if isinstance(orig, dali.backend.TensorListGPU):
orig = orig.as_cpu()
orig = orig.as_cpu()
ref = [orig.at(idx) for idx in idxs]
check_batch(permuted, ref, len(ref), 0, 0, "abc")

Expand Down
6 changes: 2 additions & 4 deletions dali/test/python/operator_1/test_coin_flip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import numpy as np
import nvidia.dali as dali
from nvidia.dali.backend_impl import TensorListGPU
from nvidia.dali.pipeline import Pipeline


Expand Down Expand Up @@ -51,11 +50,10 @@ def shape_gen_f():
if shape_out is not None:
outputs += [shape_out]
pipe.set_outputs(*outputs)
outputs = pipe.run()
data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0]
(data_out,) = tuple(out.as_cpu() for out in pipe.run())
shapes_out = None
if max_shape is not None:
shapes_out = outputs[1].as_cpu() if isinstance(outputs[1], TensorListGPU) else outputs[1]
shapes_out = outputs[1].as_cpu()
p = p if p is not None else 0.5
for i in range(batch_size):
data = np.array(data_out[i])
Expand Down
16 changes: 7 additions & 9 deletions dali/test/python/operator_1/test_coord_flip.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,20 @@ def check_operator_coord_flip(device, batch_size, layout, shape, center_x, cente
eii1 = RandomDataIterator(batch_size, shape=shape, dtype=np.float32)
pipe = CoordFlipPipeline(device, batch_size, iter(eii1), layout, center_x, center_y, center_z)
for i in range(30):
outputs = pipe.run()
outputs0, outputs1, outputs2, outputs3, outputs4 = pipe.run()
outputs1 = outputs1.as_cpu()
for sample in range(batch_size):
in_coords = outputs[0].at(sample)
if device == "gpu":
out_coords = outputs[1].as_cpu().at(sample)
else:
out_coords = outputs[1].at(sample)
in_coords = outputs0.at(sample)
out_coords = outputs1.at(sample)
if in_coords.shape == () or in_coords.shape[0] == 0:
assert out_coords.shape == () or out_coords.shape[0] == 0
continue

flip_x = outputs[2].at(sample)
flip_y = outputs[3].at(sample)
flip_x = outputs2.at(sample)
flip_y = outputs3.at(sample)
flip_z = None
if len(layout) == 3:
flip_z = outputs[4].at(sample)
flip_z = outputs4.at(sample)
_, ndim = in_coords.shape

flip_dim = [flip_x, flip_y]
Expand Down
12 changes: 2 additions & 10 deletions dali/test/python/operator_1/test_crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from nvidia.dali import pipeline_def, fn
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import nvidia.dali as dali
import numpy as np
import os
from nose_utils import assert_raises
Expand Down Expand Up @@ -578,15 +577,8 @@ def check_crop_with_out_of_bounds_policy_support(
)
if fill_values is None:
fill_values = 0
for k in range(3):
outs = pipe.run()
out = outs[0]
in_data = outs[1]
if isinstance(out, dali.backend_impl.TensorListGPU):
out = out.as_cpu()
if isinstance(in_data, dali.backend_impl.TensorListGPU):
in_data = in_data.as_cpu()

for _ in range(3):
out, in_data = tuple(out.as_cpu() for out in pipe.run())
assert batch_size == len(out)
for idx in range(batch_size):
sample_in = in_data.at(idx)
Expand Down
28 changes: 7 additions & 21 deletions dali/test/python/operator_1/test_crop_mirror_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import numpy as np
import nvidia.dali as dali
import nvidia.dali.fn as fn
import nvidia.dali.ops as ops
import nvidia.dali.types as types
Expand Down Expand Up @@ -766,15 +765,10 @@ def check_cmn_with_out_of_bounds_policy_support(

if fill_values is None:
fill_values = 0
for k in range(3):
outs = pipe.run()
out = outs[0]
in_data = outs[1]
mirror_data = outs[2]
if isinstance(out, dali.backend_impl.TensorListGPU):
out = out.as_cpu()
if isinstance(in_data, dali.backend_impl.TensorListGPU):
in_data = in_data.as_cpu()
for _ in range(3):
out, in_data, mirror_data = pipe.run()
out = out.as_cpu()
in_data = in_data.as_cpu()

assert batch_size == len(out)
for idx in range(batch_size):
Expand Down Expand Up @@ -901,20 +895,12 @@ def pipe():
batch_size = 10
p = pipe(batch_size=batch_size)
for _ in range(3):
outs = p.run()
for s in range(batch_size):
out, image_like, mean, std = [
(
np.array(o[s].as_cpu())
if isinstance(o, dali.backend_impl.TensorListGPU)
else np.array(o[s])
)
for o in outs
]
out, image_like, mean, std = tuple(out.as_cpu() for out in p.run())
ref_scale = scale or 1.0
ref_shift = shift or 0.0
ref_out = ref_scale * (image_like - mean) / std + ref_shift
np.testing.assert_allclose(out, ref_out, atol=ref_scale * 1e-6)
for s in range(batch_size):
np.testing.assert_allclose(out.at(s), ref_out.at(s), atol=ref_scale * 1e-6)


def test_per_sample_norm_args():
Expand Down
12 changes: 6 additions & 6 deletions dali/test/python/operator_1/test_input_promotion.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def test_slice_fn():
out_cpu = fn.slice(src, np.array([1, 1]), np.array([2, 1]), axes=[0, 1])
out_gpu = fn.slice(src.gpu(), np.array([1, 1]), np.array([2, 1]), axes=[0, 1])
pipe.set_outputs(out_cpu, out_gpu)
o = pipe.run()
assert np.array_equal(o[0].at(0), np.array([[14], [17]]))
assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]]))
out0, out1 = tuple(out.as_cpu() for out in pipe.run())
assert np.array_equal(out0.at(0), np.array([[14], [17]]))
assert np.array_equal(np.array(out1.at(0)), np.array([[14], [17]]))


def test_slice_ops():
Expand All @@ -56,9 +56,9 @@ def test_slice_ops():
out_cpu = slice_cpu(src, np.array([1, 1]), np.array([2, 1]))
out_gpu = slice_gpu(src.gpu(), np.array([1, 1]), np.array([2, 1]))
pipe.set_outputs(out_cpu, out_gpu)
o = pipe.run()
assert np.array_equal(o[0].at(0), np.array([[14], [17]]))
assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]]))
out0, out1 = tuple(out.as_cpu() for out in pipe.run())
assert np.array_equal(out0.at(0), np.array([[14], [17]]))
assert np.array_equal(out1.at(0), np.array([[14], [17]]))


def test_python_function():
Expand Down
6 changes: 1 addition & 5 deletions dali/test/python/operator_1/test_normal_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from nvidia.dali.pipeline import Pipeline
from nvidia.dali.backend_impl import TensorListGPU
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import numpy as np
Expand Down Expand Up @@ -106,10 +105,7 @@ def shape_gen_f():
pipe.set_outputs(out, shape_out, mean_arg, stddev_arg)
for i in range(niter):
outputs = pipe.run()
out, shapes, means, stddevs = tuple(
outputs[i].as_cpu() if isinstance(outputs[i], TensorListGPU) else outputs[i]
for i in range(len(outputs))
)
out, shapes, means, stddevs = tuple(outputs[i].as_cpu() for i in range(len(outputs)))
for sample_idx in range(batch_size):
sample = np.array(out[sample_idx])
if sample.shape == ():
Expand Down
4 changes: 1 addition & 3 deletions dali/test/python/operator_1/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from nvidia.dali.pipeline import Pipeline
from nvidia.dali import backend
import nvidia.dali.ops as ops
import numpy as np
from test_utils import dali_type
Expand Down Expand Up @@ -395,8 +394,7 @@ def iter_setup(self):


def to_list(tensor_list):
if isinstance(tensor_list, backend.TensorListGPU):
tensor_list = tensor_list.as_cpu()
tensor_list = tensor_list.as_cpu()
out = []
for i in range(len(tensor_list)):
out.append(tensor_list.at(i))
Expand Down
Loading
Loading