Skip to content

Commit

Permalink
absent -> present logic
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitriy-sobolev committed Dec 10, 2024
1 parent 888038b commit ef2c5bf
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 91 deletions.
16 changes: 8 additions & 8 deletions include/oneapi/dpl/internal/reduce_by_segment_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,12 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __keys);
auto __seg_ends_acc = __seg_ends.template get_access<sycl::access_mode::write>(__cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceCountKernel>(
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_reduce_count_kernel,
#endif
sycl::nd_item<1> __item) {
Expand Down Expand Up @@ -319,11 +319,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
__cgh.depends_on(__seg_end_identification);
auto __seg_ends_acc = __seg_ends.template get_access<sycl::access_mode::read>(__cgh);
auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access<sycl::access_mode::read_write>(__cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceOffsetKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_reduce_offset_kernel,
#endif
sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
Expand All @@ -342,11 +342,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
auto __partials_acc = __partials.template get_access<sycl::access_mode::read_write>(__cgh);
auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access<sycl::access_mode::read>(__cgh);
__dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReduceWgKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_reduce_wg_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
Expand Down Expand Up @@ -465,11 +465,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy
__dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh);

__cgh.depends_on(__wg_reduce);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegReducePrefixKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_reduce_prefix_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
Expand Down
8 changes: 4 additions & 4 deletions include/oneapi/dpl/internal/scan_by_segment_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,11 @@ struct __sycl_scan_by_segment_impl

__dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh);

#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegScanWgKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_scan_wg_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
Expand Down Expand Up @@ -268,11 +268,11 @@ struct __sycl_scan_by_segment_impl
__dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh);

__dpl_sycl::__local_accessor<bool> __loc_seg_ends_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_SegScanPrefixKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__seg_scan_prefix_kernel,
#endif
sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) {
Expand Down
8 changes: 4 additions & 4 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,11 +324,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
auto __temp_acc = __result_and_scratch.template __get_scratch_acc<sycl::access_mode::write>(
__cgh, __dpl_sycl::__no_init{});
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle());
#endif
__cgh.parallel_for<_LocalScanKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel_1,
#endif
sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) {
Expand All @@ -345,11 +345,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
__cgh.depends_on(__submit_event);
auto __temp_acc = __result_and_scratch.template __get_scratch_acc<sycl::access_mode::read_write>(__cgh);
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle());
#endif
__cgh.parallel_for<_GroupScanKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel_2,
#endif
// TODO: try to balance work between several workgroups instead of one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, :
oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng);
// an accessor per work-group with value counters from each work-item
auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) {
Expand Down Expand Up @@ -299,11 +299,11 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size
__hdl.depends_on(__dependency_event);
// access the counters for all work groups
oneapi::dpl::__ranges::__require_access(__hdl, __count_rng);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) {
Expand Down Expand Up @@ -544,11 +544,11 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments,

typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl);

#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__hdl.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__hdl.parallel_for<_KernelName>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
//Each SYCL work group processes one data segment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,11 @@ struct __parallel_transform_reduce_impl
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__cgh.use_kernel_bundle(__kernel.get_kernel_bundle());
#endif
__cgh.parallel_for<_ReduceKernel>(
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
__kernel,
#endif
sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ ::std::size_t
__kernel_work_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __kernel)
{
const sycl::device& __device = __policy.queue().get_device();
#if !_ONEDPL_SYCL2020_KERNEL_DEVICE_API_ABSENT
#if _ONEDPL_SYCL2020_KERNEL_DEVICE_API_PRESENT
return __kernel.template get_info<sycl::info::kernel_device_specific::work_group_size>(__device);
#else
return __kernel.template get_work_group_info<sycl::info::kernel_work_group::work_group_size>(__device);
Expand All @@ -127,7 +127,7 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __
const sycl::device& __device = __policy.queue().get_device();
[[maybe_unused]] const ::std::size_t __wg_size = __kernel_work_group_size(__policy, __kernel);
const ::std::uint32_t __sg_size =
#if !_ONEDPL_SYCL2020_KERNEL_DEVICE_API_ABSENT
#if _ONEDPL_SYCL2020_KERNEL_DEVICE_API_PRESENT
__kernel.template get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
__device
# if _ONEDPL_LIBSYCL_VERSION < 60000
Expand Down Expand Up @@ -264,7 +264,7 @@ class __kernel_compiler
static_assert(__kernel_count > 0, "At least one kernel name should be provided");

public:
#if !_ONEDPL_SYCL2020_KERNEL_BUNDLE_ABSENT
#if _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT
template <typename _Exec>
static auto
__compile(_Exec&& __exec)
Expand Down Expand Up @@ -539,7 +539,7 @@ struct __result_and_scratch_storage
inline bool
__use_USM_host_allocations(sycl::queue __queue)
{
#if !_ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_ABSENT
#if _ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_PRESENT
auto __device = __queue.get_device();
if (!__device.is_gpu())
return false;
Expand All @@ -556,7 +556,7 @@ struct __result_and_scratch_storage
inline bool
__use_USM_allocations(sycl::queue __queue)
{
#if !_ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_ABSENT
#if _ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_PRESENT
return __queue.get_device().has(sycl::aspect::usm_device_allocations);
#else
return false;
Expand Down Expand Up @@ -609,7 +609,7 @@ struct __result_and_scratch_storage
static auto
__get_usm_or_buffer_accessor_ptr(const _Acc& __acc, std::size_t __scratch_n = 0)
{
#if !_ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_ABSENT
#if _ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_PRESENT
return __acc.__get_pointer();
#else
return &__acc[__scratch_n];
Expand All @@ -620,7 +620,7 @@ struct __result_and_scratch_storage
auto
__get_result_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const
{
#if !_ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_ABSENT
#if _ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host && __supports_USM_device)
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __result_buf.get(), __prop_list);
else if (__supports_USM_device)
Expand All @@ -636,7 +636,7 @@ struct __result_and_scratch_storage
auto
__get_scratch_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const
{
#if !_ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_ABSENT
#if _ONEDPL_SYCL2020_UNIFIED_USM_BUFFER_PRESENT
if (__use_USM_host || __supports_USM_device)
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __scratch_buf.get(), __prop_list);
return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __sycl_buf.get(), __prop_list);
Expand Down
Loading

0 comments on commit ef2c5bf

Please sign in to comment.