Skip to content

Commit

Permalink
[oneDPL][ranges][merge] support size limit for output; fixes for __pa…
Browse files Browse the repository at this point in the history
…r_backend::__parallel_for
  • Loading branch information
MikeDvorskiy committed Nov 28, 2024
1 parent 6ec465c commit 0066210
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 9 deletions.
6 changes: 3 additions & 3 deletions include/oneapi/dpl/pstl/algorithm_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2948,7 +2948,7 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
// merge
//------------------------------------------------------------------------

template<std::random_access_iterator It1, std::random_access_iterator It2, std::random_access_iterator ItOut, typename _Comp>
template<typename It1, typename It2, typename ItOut, typename _Comp>
std::pair<It1, It2>
__brick_merge_2(It1 __it_1, It1 __it_1_e, It2 __it_2, It2 __it_2_e, ItOut __it_out, ItOut __it_out_e, _Comp __comp,
/* __is_vector = */ std::false_type)
Expand Down Expand Up @@ -3082,8 +3082,8 @@ __pattern_merge_2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _It1 __i

//serial merge n elements, starting from input x and y, to [i, j) output range
auto __res = __brick_merge_2(__it_1 + __r, __it_1 + __n_1,
__it_2 + __c, __it_2 + __n_2,
__it_out + __i, __it_out + __j, __comp, _IsVector{});
__it_2 + __c, __it_2 + __n_2,
__it_out + __i, __it_out + __j, __comp, _IsVector{});

if(__j == __n_out)
{
Expand Down
11 changes: 6 additions & 5 deletions include/oneapi/dpl/pstl/omp/parallel_for.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ namespace __omp_backend

template <class _Index, class _Fp>
void
__parallel_for_body(_Index __first, _Index __last, _Fp __f)
__parallel_for_body(_Index __first, _Index __last, _Fp __f, std::size_t __grainsize)
{
// initial partition of the iteration space into chunks
auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last);
auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last, __grainsize);

// To avoid over-subscription we use taskloop for the nested parallelism
_PSTL_PRAGMA(omp taskloop untied mergeable)
Expand All @@ -49,20 +49,21 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f)

template <class _ExecutionPolicy, class _Index, class _Fp>
void
__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f,
std::size_t __grainsize = __default_chunk_size)
{
if (omp_in_parallel())
{
// we don't create a nested parallel region in an existing parallel
// region: just create tasks
oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f);
oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize);
}
else
{
// in any case (nested or non-nested) one parallel region is created and
// only one thread creates a set of tasks
_PSTL_PRAGMA(omp parallel)
_PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); }
_PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize); }
}
}

Expand Down
3 changes: 3 additions & 0 deletions include/oneapi/dpl/pstl/parallel_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# endif
#endif

//the parallel backend constants
#define _ONEDPL_MERGE_CUT_OFF 2000

namespace oneapi
{
namespace dpl
Expand Down
2 changes: 1 addition & 1 deletion include/oneapi/dpl/pstl/parallel_backend_serial.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ __cancel_execution(oneapi::dpl::__internal::__serial_backend_tag)
template <class _ExecutionPolicy, class _Index, class _Fp>
void
__parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
_Fp __f)
_Fp __f, std::size_t __grainsize = 1)
{
__f(__first, __last);
}
Expand Down

0 comments on commit 0066210

Please sign in to comment.