From 006621002a4fe7764285ed4e2f8fdadcbf8f4fd5 Mon Sep 17 00:00:00 2001 From: MikeDvorskiy Date: Thu, 28 Nov 2024 12:55:37 +0100 Subject: [PATCH] [oneDPL][ranges][merge] support size limit for output; fixes for __par_backend::__parallel_for --- include/oneapi/dpl/pstl/algorithm_impl.h | 6 +++--- include/oneapi/dpl/pstl/omp/parallel_for.h | 11 ++++++----- include/oneapi/dpl/pstl/parallel_backend.h | 3 +++ include/oneapi/dpl/pstl/parallel_backend_serial.h | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index b33c7707726..7ee34dacd73 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -2948,7 +2948,7 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, // merge //------------------------------------------------------------------------ -template +template std::pair __brick_merge_2(It1 __it_1, It1 __it_1_e, It2 __it_2, It2 __it_2_e, ItOut __it_out, ItOut __it_out_e, _Comp __comp, /* __is_vector = */ std::false_type) @@ -3082,8 +3082,8 @@ __pattern_merge_2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _It1 __i //serial merge n elements, starting from input x and y, to [i, j) output range auto __res = __brick_merge_2(__it_1 + __r, __it_1 + __n_1, - __it_2 + __c, __it_2 + __n_2, - __it_out + __i, __it_out + __j, __comp, _IsVector{}); + __it_2 + __c, __it_2 + __n_2, + __it_out + __i, __it_out + __j, __comp, _IsVector{}); if(__j == __n_out) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 1a0ea24d798..917b3089059 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -29,10 +29,10 @@ namespace __omp_backend template void -__parallel_for_body(_Index __first, _Index __last, _Fp __f) +__parallel_for_body(_Index __first, _Index __last, _Fp __f, std::size_t __grainsize) { // initial partition of the iteration space into chunks - auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last); + auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last, __grainsize); // To avoid over-subscription we use taskloop for the nested parallelism _PSTL_PRAGMA(omp taskloop untied mergeable) @@ -49,20 +49,21 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f) template void -__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f, + std::size_t __grainsize = __default_chunk_size) { if (omp_in_parallel()) { // we don't create a nested parallel region in an existing parallel // region: just create tasks - oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); + oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize); } else { // in any case (nested or non-nested) one parallel region is created and // only one thread creates a set of tasks _PSTL_PRAGMA(omp parallel) - _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); } + _PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize); } } } diff --git a/include/oneapi/dpl/pstl/parallel_backend.h b/include/oneapi/dpl/pstl/parallel_backend.h index b243e8fb492..841a9357eb7 100644 --- a/include/oneapi/dpl/pstl/parallel_backend.h +++ b/include/oneapi/dpl/pstl/parallel_backend.h @@ -35,6 +35,9 @@ # endif #endif +//the parallel backend constants +#define _ONEDPL_MERGE_CUT_OFF 2000 + namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 6acd4b617f9..032306dbe69 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -45,7 +45,7 @@ __cancel_execution(oneapi::dpl::__internal::__serial_backend_tag) template void __parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, - _Fp __f) + _Fp __f, std::size_t __grainsize = 1) { __f(__first, __last); }