Skip to content

Commit

Permalink
Add .as_cpu() to TensorCPU and TensorListCPU (#5751)
Browse files Browse the repository at this point in the history
* Add as_cpu to TensorCPU and TensorListCPU

Signed-off-by: Joaquin Anton Guirao <janton@nvidia.com>
  • Loading branch information
jantonguirao authored Dec 30, 2024
1 parent 6530ad9 commit 4a409b9
Show file tree
Hide file tree
Showing 28 changed files with 108 additions and 186 deletions.
9 changes: 9 additions & 0 deletions dali/python/backend_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,11 @@ void ExposeTensor(py::module &m) {
Returns a `TensorGPU` object being a copy of this `TensorCPU`.
)code",
py::return_value_policy::take_ownership)
.def("as_cpu", [](Tensor<CPUBackend> &t) -> Tensor<CPUBackend>& {
return t;
},
R"code(Passthrough, since the object is already an instance of `TensorCPU`.)code",
py::return_value_policy::reference_internal)
.def("copy_to_external",
[](Tensor<CPUBackend> &t, py::object p) {
CopyToExternal<mm::memory_kind::host>(ctypes_void_ptr(p), t, AccessOrder::host(), false);
Expand Down Expand Up @@ -1174,6 +1179,10 @@ void ExposeTensorList(py::module &m) {
Returns a `TensorListGPU` object being a copy of this `TensorListCPU`.
)code",
py::return_value_policy::take_ownership)
.def("as_cpu", [](TensorList<CPUBackend> &t) -> TensorList<CPUBackend> & {
return t;
}, R"code(Passthrough, as it is already an instance of `TensorListCPU`.)code",
py::return_value_policy::reference_internal)
.def("layout", [](TensorList<CPUBackend> &t) {
return t.GetLayout().str();
})
Expand Down
2 changes: 2 additions & 0 deletions dali/python/nvidia/dali/tensors.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class TensorCPU:
@overload
def __init__(self, b, layout: str = ..., is_pinned: bool = ...) -> None: ...
def _as_gpu(self, *args, **kwargs) -> Any: ...
def as_cpu(self) -> TensorCPU: ...

# def __dlpack__(self, stream: Optional[int] = None) -> capsule: ...

Expand Down Expand Up @@ -92,6 +93,7 @@ class TensorListCPU:
def __init__(self, list_of_tensors: list, layout: str = ...) -> None: ...

# def _as_gpu(self, *args, **kwargs) -> TensorListGPU: ...
def as_cpu(self) -> TensorListCPU: ...

# def as_array(self) -> numpy.ndarray: ...
def as_array(self) -> Any: ...
Expand Down
4 changes: 1 addition & 3 deletions dali/test/python/auto_aug/test_augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from PIL import Image, ImageEnhance, ImageOps
from nose2.tools import params, cartesian_params

import nvidia.dali.tensors as _tensors
from nvidia.dali import fn, pipeline_def
from nvidia.dali.auto_aug import augmentations as a
from nvidia.dali.auto_aug.core._utils import get_translations as _get_translations
Expand Down Expand Up @@ -90,8 +89,7 @@ def pipeline():
if dev == "gpu":
output = output.as_cpu()
output = [np.array(sample) for sample in output]
if isinstance(data, _tensors.TensorListGPU):
data = data.as_cpu()
data = data.as_cpu()
data = [np.array(sample) for sample in data]

if modality == "image":
Expand Down
5 changes: 2 additions & 3 deletions dali/test/python/auto_aug/test_rand_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from scipy.stats import chisquare
from nose2.tools import params

from nvidia.dali import fn, tensors, types
from nvidia.dali import fn, types
from nvidia.dali import pipeline_def
from nvidia.dali.auto_aug import rand_augment
from nvidia.dali.auto_aug.core import augmentation
Expand All @@ -43,8 +43,7 @@ def debug_discrepancy_helper(*batch_pairs):
"""

def as_array_list(batch):
if isinstance(batch, tensors.TensorListGPU):
batch = batch.as_cpu()
batch = batch.as_cpu()
return [np.array(sample) for sample in batch]

batch_names = [name for _, _, name in batch_pairs]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import glob
import numpy as np
import itertools
import nvidia.dali as dali
from nvidia.dali import fn, pipeline_def, types
from test_utils import (
compare_pipelines,
Expand All @@ -41,9 +40,7 @@


def tensor_list_to_array(tensor_list):
if isinstance(tensor_list, dali.backend_impl.TensorListGPU):
tensor_list = tensor_list.as_cpu()
return tensor_list.as_array()
return tensor_list.as_cpu().as_array()


# Check whether a given pipeline is stateless
Expand Down
7 changes: 2 additions & 5 deletions dali/test/python/decoder/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import os
from itertools import cycle
from test_utils import get_dali_extra_path, is_mulit_gpu, skip_if_m60
from nvidia.dali.backend import TensorListGPU
from nose2.tools import params
from nose_utils import SkipTest, attr, assert_raises

Expand Down Expand Up @@ -76,8 +75,7 @@ def video_decoder_iter(batch_size, epochs=1, device="cpu", module=fn.experimenta
)
for _ in range(int((epochs * len(files) + batch_size - 1) / batch_size)):
(output,) = pipe.run()
if isinstance(output, TensorListGPU):
output = output.as_cpu()
output = output.as_cpu()
for i in range(batch_size):
yield np.array(output[i])

Expand All @@ -87,8 +85,7 @@ def ref_iter(epochs=1, device="cpu"):
for filename in filenames:
pipe = reference_pipeline(filename, device=device)
(output,) = pipe.run()
if isinstance(output, TensorListGPU):
output = output.as_cpu()
output = output.as_cpu()
yield np.array(output[0])


Expand Down
15 changes: 4 additions & 11 deletions dali/test/python/operator_1/test_arithmetic_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import nvidia.dali.math as math
from nvidia.dali.tensors import TensorListGPU
import numpy as np
from nose_utils import attr, raises, assert_raises, assert_equals
from nose2.tools import params
Expand Down Expand Up @@ -226,12 +225,6 @@ def default_range(*types):
]


def as_cpu(tl):
if isinstance(tl, TensorListGPU):
return tl.as_cpu()
return tl


def max_dtype(kind, left_dtype, right_dtype):
return np.dtype(kind + str(max(left_dtype.itemsize, right_dtype.itemsize)))

Expand Down Expand Up @@ -449,8 +442,8 @@ def get_numpy_input(input, kind, orig_type, target_type):


def extract_un_data(pipe_out, sample_id, kind, target_type):
input = as_cpu(pipe_out[0]).at(sample_id)
out = as_cpu(pipe_out[1]).at(sample_id)
input = np.array(pipe_out[0][sample_id].as_cpu())
out = np.array(pipe_out[1][sample_id].as_cpu())
assert_equals(out.dtype, target_type)
in_np = get_numpy_input(input, kind, input.dtype.type, target_type)
return in_np, out
Expand All @@ -465,15 +458,15 @@ def extract_data(pipe_out, sample_id, kinds, target_type):
arity = len(kinds)
inputs = []
for i in range(arity):
dali_in = as_cpu(pipe_out[i]).at(sample_id)
dali_in = np.array(pipe_out[i][sample_id].as_cpu())
numpy_in = get_numpy_input(
dali_in,
kinds[i],
dali_in.dtype.type,
target_type if target_type is not None else dali_in.dtype.type,
)
inputs.append(numpy_in)
out = as_cpu(pipe_out[arity]).at(sample_id)
out = np.array(pipe_out[arity][sample_id].as_cpu())
return tuple(inputs) + (out,)


Expand Down
9 changes: 3 additions & 6 deletions dali/test/python/operator_1/test_batch_permute.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import nvidia.dali as dali
import nvidia.dali.fn as fn
from nvidia.dali.pipeline import Pipeline
import numpy as np
Expand Down Expand Up @@ -67,8 +66,7 @@ def _test_permute_batch(device, type):
for i in range(10):
orig, permuted, idxs = pipe.run()
idxs = [int(idxs.at(i)) for i in range(batch_size)]
if isinstance(orig, dali.backend.TensorListGPU):
orig = orig.as_cpu()
orig = orig.as_cpu()
ref = [orig.at(idx) for idx in idxs]
check_batch(permuted, ref, len(ref), 0, 0, "abc")

Expand All @@ -88,10 +86,9 @@ def _test_permute_batch_fixed(device):
idxs = [4, 8, 0, 6, 3, 5, 2, 9, 7, 1]
pipe.set_outputs(data, fn.permute_batch(data, indices=idxs))

for i in range(10):
for _ in range(10):
orig, permuted = pipe.run()
if isinstance(orig, dali.backend.TensorListGPU):
orig = orig.as_cpu()
orig = orig.as_cpu()
ref = [orig.at(idx) for idx in idxs]
check_batch(permuted, ref, len(ref), 0, 0, "abc")

Expand Down
7 changes: 3 additions & 4 deletions dali/test/python/operator_1/test_coin_flip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import numpy as np
import nvidia.dali as dali
from nvidia.dali.backend_impl import TensorListGPU
from nvidia.dali.pipeline import Pipeline


Expand Down Expand Up @@ -51,11 +50,11 @@ def shape_gen_f():
if shape_out is not None:
outputs += [shape_out]
pipe.set_outputs(*outputs)
outputs = pipe.run()
data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0]
outputs = tuple(out.as_cpu() for out in pipe.run())
data_out = outputs[0]
shapes_out = None
if max_shape is not None:
shapes_out = outputs[1].as_cpu() if isinstance(outputs[1], TensorListGPU) else outputs[1]
shapes_out = outputs[1]
p = p if p is not None else 0.5
for i in range(batch_size):
data = np.array(data_out[i])
Expand Down
9 changes: 3 additions & 6 deletions dali/test/python/operator_1/test_coord_flip.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,11 @@ def define_graph(self):
def check_operator_coord_flip(device, batch_size, layout, shape, center_x, center_y, center_z):
eii1 = RandomDataIterator(batch_size, shape=shape, dtype=np.float32)
pipe = CoordFlipPipeline(device, batch_size, iter(eii1), layout, center_x, center_y, center_z)
for i in range(30):
outputs = pipe.run()
for _ in range(30):
outputs = tuple(out.as_cpu() for out in pipe.run())
for sample in range(batch_size):
in_coords = outputs[0].at(sample)
if device == "gpu":
out_coords = outputs[1].as_cpu().at(sample)
else:
out_coords = outputs[1].at(sample)
out_coords = outputs[1].at(sample)
if in_coords.shape == () or in_coords.shape[0] == 0:
assert out_coords.shape == () or out_coords.shape[0] == 0
continue
Expand Down
12 changes: 2 additions & 10 deletions dali/test/python/operator_1/test_crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from nvidia.dali import pipeline_def, fn
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import nvidia.dali as dali
import numpy as np
import os
from nose_utils import assert_raises
Expand Down Expand Up @@ -578,15 +577,8 @@ def check_crop_with_out_of_bounds_policy_support(
)
if fill_values is None:
fill_values = 0
for k in range(3):
outs = pipe.run()
out = outs[0]
in_data = outs[1]
if isinstance(out, dali.backend_impl.TensorListGPU):
out = out.as_cpu()
if isinstance(in_data, dali.backend_impl.TensorListGPU):
in_data = in_data.as_cpu()

for _ in range(3):
out, in_data = tuple(out.as_cpu() for out in pipe.run())
assert batch_size == len(out)
for idx in range(batch_size):
sample_in = in_data.at(idx)
Expand Down
33 changes: 10 additions & 23 deletions dali/test/python/operator_1/test_crop_mirror_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import numpy as np
import nvidia.dali as dali
import nvidia.dali.fn as fn
import nvidia.dali.ops as ops
import nvidia.dali.types as types
Expand Down Expand Up @@ -766,15 +765,10 @@ def check_cmn_with_out_of_bounds_policy_support(

if fill_values is None:
fill_values = 0
for k in range(3):
outs = pipe.run()
out = outs[0]
in_data = outs[1]
mirror_data = outs[2]
if isinstance(out, dali.backend_impl.TensorListGPU):
out = out.as_cpu()
if isinstance(in_data, dali.backend_impl.TensorListGPU):
in_data = in_data.as_cpu()
for _ in range(3):
out, in_data, mirror_data = pipe.run()
out = out.as_cpu()
in_data = in_data.as_cpu()

assert batch_size == len(out)
for idx in range(batch_size):
Expand Down Expand Up @@ -900,21 +894,14 @@ def pipe():

batch_size = 10
p = pipe(batch_size=batch_size)
ref_scale = scale or 1.0
ref_shift = shift or 0.0
for _ in range(3):
outs = p.run()
outs = tuple(np.array(out.as_cpu()) for out in p.run())
for s in range(batch_size):
out, image_like, mean, std = [
(
np.array(o[s].as_cpu())
if isinstance(o, dali.backend_impl.TensorListGPU)
else np.array(o[s])
)
for o in outs
]
ref_scale = scale or 1.0
ref_shift = shift or 0.0
ref_out = ref_scale * (image_like - mean) / std + ref_shift
np.testing.assert_allclose(out, ref_out, atol=ref_scale * 1e-6)
out, image_like, mean, std = tuple(np.array(o[s]) for o in outs)
ref_out = ref_scale * (image_like - mean) / std + ref_shift
np.testing.assert_allclose(out, ref_out, atol=ref_scale * 1e-6)


def test_per_sample_norm_args():
Expand Down
12 changes: 6 additions & 6 deletions dali/test/python/operator_1/test_input_promotion.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def test_slice_fn():
out_cpu = fn.slice(src, np.array([1, 1]), np.array([2, 1]), axes=[0, 1])
out_gpu = fn.slice(src.gpu(), np.array([1, 1]), np.array([2, 1]), axes=[0, 1])
pipe.set_outputs(out_cpu, out_gpu)
o = pipe.run()
assert np.array_equal(o[0].at(0), np.array([[14], [17]]))
assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]]))
out0, out1 = tuple(out.as_cpu() for out in pipe.run())
assert np.array_equal(out0.at(0), np.array([[14], [17]]))
assert np.array_equal(np.array(out1.at(0)), np.array([[14], [17]]))


def test_slice_ops():
Expand All @@ -56,9 +56,9 @@ def test_slice_ops():
out_cpu = slice_cpu(src, np.array([1, 1]), np.array([2, 1]))
out_gpu = slice_gpu(src.gpu(), np.array([1, 1]), np.array([2, 1]))
pipe.set_outputs(out_cpu, out_gpu)
o = pipe.run()
assert np.array_equal(o[0].at(0), np.array([[14], [17]]))
assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]]))
out0, out1 = tuple(out.as_cpu() for out in pipe.run())
assert np.array_equal(out0.at(0), np.array([[14], [17]]))
assert np.array_equal(out1.at(0), np.array([[14], [17]]))


def test_python_function():
Expand Down
6 changes: 1 addition & 5 deletions dali/test/python/operator_1/test_normal_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from nvidia.dali.pipeline import Pipeline
from nvidia.dali.backend_impl import TensorListGPU
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import numpy as np
Expand Down Expand Up @@ -106,10 +105,7 @@ def shape_gen_f():
pipe.set_outputs(out, shape_out, mean_arg, stddev_arg)
for i in range(niter):
outputs = pipe.run()
out, shapes, means, stddevs = tuple(
outputs[i].as_cpu() if isinstance(outputs[i], TensorListGPU) else outputs[i]
for i in range(len(outputs))
)
out, shapes, means, stddevs = tuple(outputs[i].as_cpu() for i in range(len(outputs)))
for sample_idx in range(batch_size):
sample = np.array(out[sample_idx])
if sample.shape == ():
Expand Down
4 changes: 1 addition & 3 deletions dali/test/python/operator_1/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from nvidia.dali.pipeline import Pipeline
from nvidia.dali import backend
import nvidia.dali.ops as ops
import numpy as np
from test_utils import dali_type
Expand Down Expand Up @@ -395,8 +394,7 @@ def iter_setup(self):


def to_list(tensor_list):
if isinstance(tensor_list, backend.TensorListGPU):
tensor_list = tensor_list.as_cpu()
tensor_list = tensor_list.as_cpu()
out = []
for i in range(len(tensor_list)):
out.append(tensor_list.at(i))
Expand Down
Loading

0 comments on commit 4a409b9

Please sign in to comment.