Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onedpl][ranges] copy_if implementation for sized output #1898

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 148 additions & 1 deletion include/oneapi/dpl/pstl/algorithm_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,25 @@ __brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterat
return ::std::copy_if(__first, __last, __result, __pred);
}

template <class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
std::pair<_ForwardIterator, _OutputIterator>
__brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
typename std::iterator_traits<_OutputIterator>::difference_type __m, _UnaryPredicate __pred,
/*vector=*/::std::false_type) noexcept
{
for(; __first != __last && __m > 0; ++__first)
{
const auto& __v = *__first;
if(__pred(__v))
{
*__result = __v;
++__result;
--__m;
}
}
return {__first, __result};
}

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _UnaryPredicate>
_RandomAccessIterator2
__brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
Expand All @@ -1206,7 +1225,20 @@ __brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _
#if (_PSTL_MONOTONIC_PRESENT || _ONEDPL_MONOTONIC_PRESENT)
return __unseq_backend::__simd_copy_if(__first, __last - __first, __result, __pred);
#else
return ::std::copy_if(__first, __last, __result, __pred);
return __brick_copy_if(__first, __last, __result, __pred, std::false_type{});
#endif
}

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _UnaryPredicate>
std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
__brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
typename std::iterator_traits<_RandomAccessIterator2>::difference_type __m, _UnaryPredicate __pred,
/*vector=*/::std::true_type) noexcept
{
#if (_PSTL_MONOTONIC_PRESENT || _ONEDPL_MONOTONIC_PRESENT)
return __unseq_backend::__simd_copy_if(__first, __last - __first, __result, __m, __pred);
#else
return __brick_copy_if(__first, __last, __result, __m, __pred, std::false_type{});
#endif
}

Expand All @@ -1233,6 +1265,28 @@ __brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __r
return ::std::make_pair(__count_true, __size - __count_true);
}

template <class _DifferenceType, class _ForwardIterator, class _Bound, class _UnaryPredicate>
std::pair<_DifferenceType, _ForwardIterator>
__brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, _Bound __m, bool* __restrict __mask, _UnaryPredicate __pred,
/*vector=*/::std::false_type) noexcept
{
auto __count_true = _DifferenceType(0);
auto __size = __last - __first;

static_assert(__is_random_access_iterator_v<_ForwardIterator>,
"Pattern-brick error. Should be a random access iterator.");

for (; __first != __last && __count_true < __m; ++__first, ++__mask)
{
*__mask = __pred(*__first);
if (*__mask)
{
++__count_true;
}
}
return {__count_true, __first};
}

template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
::std::pair<_DifferenceType, _DifferenceType>
__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred,
Expand All @@ -1242,6 +1296,15 @@ __brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last,
return ::std::make_pair(__result, (__last - __first) - __result);
}

template <class _DifferenceType, class _RandomAccessIterator, class _Bound, class _UnaryPredicate>
std::pair<_DifferenceType, _RandomAccessIterator>
__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, _Bound __m, bool* __mask, _UnaryPredicate __pred,
/*vector=*/::std::true_type) noexcept
{
auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __m, __mask, __pred);
return {__result.first, __first + __result.second};
}

template <class _ForwardIterator, class _OutputIterator, class _Assigner>
void
__brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, bool* __mask,
Expand All @@ -1257,6 +1320,23 @@ __brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputI
}
}

template <class _ForwardIterator, class _OutputIterator, class _Bound, class _Assigner>
std::pair<_ForwardIterator, _OutputIterator>
__brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _Bound __m, bool* __mask,
_Assigner __assigner, /*vector=*/::std::false_type) noexcept
{
for (; __first != __last && __m > 0; ++__first, ++__mask)
{
if (*__mask)
{
__assigner(__first, __result);
++__result;
--__m;
}
}
return {__first, __result};
}

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Assigner>
void
__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
Expand All @@ -1269,6 +1349,18 @@ __brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __la
#endif
}

template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Bound, class _Assigner>
auto
__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
_Bound __m, bool* __restrict __mask, _Assigner __assigner, /*vector=*/::std::true_type) noexcept
{
#if (_PSTL_MONOTONIC_PRESENT || _ONEDPL_MONOTONIC_PRESENT)
return __unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __m, __mask, __assigner);
#else
return __internal::__brick_copy_by_mask(__first, __last, __result, __m, __mask, __assigner, ::std::false_type());
#endif
}

template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2>
void
__brick_partition_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
Expand Down Expand Up @@ -1312,6 +1404,16 @@ __pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIt
return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{});
}

template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
std::pair<_ForwardIterator, _OutputIterator>
__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
_UnaryPredicate __pred, typename std::iterator_traits<_OutputIterator>::difference_type __n_out) noexcept
{
static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>);

return __internal::__brick_copy_if(__first, __last, __result, __n_out, __pred, typename _Tag::__is_vector{});
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _UnaryPredicate>
_RandomAccessIterator2
Expand All @@ -1322,6 +1424,7 @@ __pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA

typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType;
const _DifferenceType __n = __last - __first;

if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n);
Expand Down Expand Up @@ -1349,6 +1452,50 @@ __pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA
return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{});
}

template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
class _UnaryPredicate>
std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
_RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred,
typename std::iterator_traits<_RandomAccessIterator2>::difference_type __n_out)
{
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;

typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType;
const _DifferenceType __n = __last - __first;

if(__n_out < 0)
__n_out = __n;

if (_DifferenceType(1) < __n)
{
__par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n);
return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf, __n_out]() {
bool* __mask = __mask_buf.get();
_DifferenceType __res_in{}, __res_out{};
__par_backend::__parallel_strict_scan(
__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
[=](_DifferenceType __i, _DifferenceType __len) { // Reduce
return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
__n_out, __mask + __i, __pred, _IsVector{})
.first;
},
::std::plus<_DifferenceType>(), // Combine
[=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial, _DifferenceType __len_out) { // Scan
auto res = __internal::__brick_copy_by_mask(
__first + __i, __first + (__i + __len), __result + __initial, __len_out, __mask + __i,
[](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
return std::make_pair(res.first - (__first + __i), res.second - (__result + __initial));
},
[&__res_in, &__res_out](auto __total_in, auto __total_out) { __res_in = __total_in; __res_out = __total_out; },
__n_out);
return std::make_pair(__first + __res_in, __result + __res_out);
});
}
// trivial sequence - use serial algorithm
return __internal::__brick_copy_if(__first, __last, __result, __n_out, __pred, _IsVector{});
}

//------------------------------------------------------------------------
// count
//------------------------------------------------------------------------
Expand Down
22 changes: 18 additions & 4 deletions include/oneapi/dpl/pstl/algorithm_ranges_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,22 +420,36 @@ __pattern_copy_if_ranges(_Tag __tag, _ExecutionPolicy&& __exec, _InRange&& __in_
auto __pred_1 = [__pred, __proj](auto&& __val) { return std::invoke(__pred, std::invoke(__proj,
std::forward<decltype(__val)>(__val)));};

auto __res_idx = oneapi::dpl::__internal::__pattern_copy_if(__tag, std::forward<_ExecutionPolicy>(__exec),
auto __res = oneapi::dpl::__internal::__pattern_copy_if(__tag, std::forward<_ExecutionPolicy>(__exec),
std::ranges::begin(__in_r), std::ranges::begin(__in_r) + std::ranges::size(__in_r),
std::ranges::begin(__out_r), __pred_1) - std::ranges::begin(__out_r);
std::ranges::begin(__out_r), __pred_1, std::ranges::size(__out_r));

using __return_type = std::ranges::copy_if_result<std::ranges::borrowed_iterator_t<_InRange>,
std::ranges::borrowed_iterator_t<_OutRange>>;

return __return_type{std::ranges::begin(__in_r) + std::ranges::size(__in_r), std::ranges::begin(__out_r) + __res_idx};
return __return_type{__res.first, __res.second};
}

template<typename _ExecutionPolicy, typename _InRange, typename _OutRange, typename _Pred, typename _Proj>
auto
__pattern_copy_if_ranges(__serial_tag</*IsVector*/std::false_type>, _ExecutionPolicy&& __exec, _InRange&& __in_r, _OutRange&& __out_r,
_Pred __pred, _Proj __proj)
{
return std::ranges::copy_if(std::forward<_InRange>(__in_r), std::ranges::begin(__out_r), __pred, __proj);
using __return_type = std::ranges::copy_if_result<std::ranges::borrowed_iterator_t<_InRange>,
std::ranges::borrowed_iterator_t<_OutRange>>;

auto __it_in = std::ranges::begin(__in_r);
auto __it_out = std::ranges::begin(__out_r);
for(; __it_in != std::ranges::end(__in_r) && __it_out != std::ranges::end(__out_r); ++__it_in)
{
if (std::invoke(__pred, std::invoke(__proj, *__it_in)))
{
*__it_out = *__it_in;
++__it_out;
}
}

return __return_type{__it_in, __it_out};
}

//---------------------------------------------------------------------------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -901,15 +901,14 @@ __pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterato
if (__first == __last)
return __result_first;

_It1DifferenceType __n = __last - __first;

auto __n = __last - __first;
auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);
auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>();
auto __buf2 = __keep2(__result_first, __result_first + __n);

auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
__buf1.all_view(), __buf2.all_view(), __n, __pred);
__buf1.all_view(), __buf2.all_view(), __pred);

::std::size_t __num_copied = __res.get(); //is a blocking call
return __result_first + __num_copied;
Expand Down
21 changes: 12 additions & 9 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,19 +536,22 @@ __pattern_count_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
oneapi::dpl::__internal::__difference_t<_Range2>
std::pair<oneapi::dpl::__internal::__difference_t<_Range1>, oneapi::dpl::__internal::__difference_t<_Range2>>
__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_Predicate __pred, _Assign __assign)
{
oneapi::dpl::__internal::__difference_t<_Range2> __n = __rng1.size();
if (__n == 0)
return 0;
using _Index = std::size_t; //TODO
_Index __n = __rng1.size();
if (__n == 0 || __rng2.empty())
return {0, 0};

auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if(
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if_out_lim(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
std::forward<_Range2>(__rng2), __n, __pred, __assign);
std::forward<_Range2>(__rng2), __pred, __assign);

return __res.get(); //is a blocking call
std::array<_Index, 2> __idx;
__res.get_values(__idx); //a blocking call
return {__idx[1], __idx[0]}; //__parallel_copy_if_out_lim returns {last index in output, last index in input}
}

#if _ONEDPL_CPP20_RANGES_PRESENT
Expand All @@ -561,15 +564,15 @@ __pattern_copy_if_ranges(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e
auto __pred_1 = [__pred, __proj](auto&& __val) { return std::invoke(__pred, std::invoke(__proj,
std::forward<decltype(__val)>(__val)));};

auto __res_idx = oneapi::dpl::__internal::__ranges::__pattern_copy_if(__tag,
auto __res = oneapi::dpl::__internal::__ranges::__pattern_copy_if(__tag,
std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::views::all_read(__in_r),
oneapi::dpl::__ranges::views::all_write(__out_r), __pred_1,
oneapi::dpl::__internal::__pstl_assign());

using __return_t = std::ranges::copy_if_result<std::ranges::borrowed_iterator_t<_InRange>,
std::ranges::borrowed_iterator_t<_OutRange>>;

return __return_t{std::ranges::begin(__in_r) + std::ranges::size(__in_r), std::ranges::begin(__out_r) + __res_idx};
return __return_t{std::ranges::begin(__in_r) + __res.first, std::ranges::begin(__out_r) + __res.second};
}
#endif //_ONEDPL_CPP20_RANGES_PRESENT

Expand Down
Loading
Loading