From 34d7bf8ea3be6b7434e8da465b19e0340c847c16 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 14 Mar 2023 15:20:27 +0100 Subject: [PATCH 1/8] Threaded fn::backend::naive backend --- include/gridtools/common/omp.hpp | 2 +- include/gridtools/fn/backend/naive.hpp | 49 +++++++++++++++++++----- include/gridtools/thread_pool/omp.hpp | 6 +-- tests/include/fn_select.hpp | 14 +++++-- tests/unit_tests/fn/test_fn_executor.cpp | 4 +- tests/unit_tests/fn/test_fn_run.cpp | 4 +- 6 files changed, 56 insertions(+), 23 deletions(-) diff --git a/include/gridtools/common/omp.hpp b/include/gridtools/common/omp.hpp index 9e3d95b390..f1f6470030 100644 --- a/include/gridtools/common/omp.hpp +++ b/include/gridtools/common/omp.hpp @@ -9,7 +9,7 @@ */ #pragma once -#ifdef _OPENMP +#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) #include #else extern "C" { diff --git a/include/gridtools/fn/backend/naive.hpp b/include/gridtools/fn/backend/naive.hpp index e808872ba3..d9c5f717a9 100644 --- a/include/gridtools/fn/backend/naive.hpp +++ b/include/gridtools/fn/backend/naive.hpp @@ -11,28 +11,56 @@ #include "../../common/functional.hpp" #include "../../common/hymap.hpp" +#include "../../common/tuple_util.hpp" #include "../../sid/allocator.hpp" #include "../../sid/concept.hpp" #include "../../sid/contiguous.hpp" +#include "../../sid/multi_shift.hpp" #include "../../sid/unknown_kind.hpp" +#include "../../thread_pool/concept.hpp" +#include "../../thread_pool/omp.hpp" #include "./common.hpp" namespace gridtools::fn::backend { namespace naive_impl_ { + template struct naive {}; - template + template >> + auto make_parallel_loops(ThreadPool, Sizes const &sizes) { + return [=](auto f) { + return [=](auto ptr, auto const &strides) { + auto loop_f = [&](auto... indices) { + auto local_ptr = ptr; + sid::multi_shift(local_ptr, strides, Dims::make_values(indices...)); + f(local_ptr, strides); + }; + + tuple_util::apply( + [&](auto... sizes) { thread_pool::parallel_for_loop(ThreadPool(), loop_f, int(sizes)...); }, + sizes); + }; + }; + } + + template void apply_stencil_stage( - naive, Sizes const &sizes, StencilStage, MakeIterator &&make_iterator, Composite &&composite) { + naive, Sizes const &sizes, StencilStage, MakeIterator &&make_iterator, Composite &&composite) { auto ptr = sid::get_origin(std::forward(composite))(); auto strides = sid::get_strides(std::forward(composite)); - common::make_loops(sizes)([make_iterator = make_iterator()](auto ptr, auto const &strides) { + make_parallel_loops(ThreadPool(), sizes)([make_iterator = make_iterator()](auto ptr, auto const &strides) { StencilStage()(make_iterator, ptr, strides); })(ptr, strides); } - template - void apply_column_stage(naive, + template + void apply_column_stage(naive, Sizes const &sizes, ColumnStage, MakeIterator &&make_iterator, @@ -42,16 +70,19 @@ namespace gridtools::fn::backend { auto ptr = sid::get_origin(std::forward(composite))(); auto strides = sid::get_strides(std::forward(composite)); auto v_size = at_key(sizes); - common::make_loops(hymap::canonicalize_and_remove_key(sizes))( + make_parallel_loops(ThreadPool(), hymap::canonicalize_and_remove_key(sizes))( [v_size = std::move(v_size), make_iterator = make_iterator(), seed = std::move(seed)](auto ptr, auto const &strides) { ColumnStage()(seed, v_size, make_iterator, std::move(ptr), strides); })( ptr, strides); } - inline auto tmp_allocator(naive be) { return std::tuple(be, sid::allocator(&std::make_unique)); } + template + inline auto tmp_allocator(naive be) { + return std::tuple(be, sid::allocator(&std::make_unique)); + } - template - auto allocate_global_tmp(std::tuple &alloc, Sizes const &sizes, data_type) { + template + auto allocate_global_tmp(std::tuple, Allocator> &alloc, Sizes const &sizes, data_type) { return sid::make_contiguous(std::get<1>(alloc), sizes); } } // namespace naive_impl_ diff --git a/include/gridtools/thread_pool/omp.hpp b/include/gridtools/thread_pool/omp.hpp index b773b3c5c5..5ce8dbfeb5 100644 --- a/include/gridtools/thread_pool/omp.hpp +++ b/include/gridtools/thread_pool/omp.hpp @@ -10,14 +10,11 @@ #pragma once -#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) -#include -#endif +#include "../common/omp.hpp" namespace gridtools { namespace thread_pool { struct omp { -#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) friend auto thread_pool_get_thread_num(omp) { return omp_get_thread_num(); } friend auto thread_pool_get_max_threads(omp) { return omp_get_max_threads(); } @@ -44,7 +41,6 @@ namespace gridtools { for (I i = 0; i < i_lim; ++i) f(i, j, k); } -#endif }; } // namespace thread_pool } // namespace gridtools diff --git a/tests/include/fn_select.hpp b/tests/include/fn_select.hpp index 058a9700fe..554ee76a28 100644 --- a/tests/include/fn_select.hpp +++ b/tests/include/fn_select.hpp @@ -26,7 +26,7 @@ #endif #include namespace { - using fn_backend_t = gridtools::fn::backend::naive; + using fn_backend_t = gridtools::fn::backend::naive<>; } #elif defined(GT_FN_GPU) #ifndef GT_STENCIL_GPU @@ -57,10 +57,16 @@ namespace { namespace gridtools::fn::backend { namespace naive_impl_ { + template struct naive; - storage::cpu_kfirst backend_storage_traits(naive); - timer_dummy backend_timer_impl(naive); - inline char const *backend_name(naive const &) { return "naive"; } + template + storage::cpu_kfirst backend_storage_traits(naive); + template + timer_dummy backend_timer_impl(naive); + template + inline char const *backend_name(naive const &) { + return "naive"; + } } // namespace naive_impl_ namespace gpu_impl_ { diff --git a/tests/unit_tests/fn/test_fn_executor.cpp b/tests/unit_tests/fn/test_fn_executor.cpp index ed1bff5809..a5ff81f32a 100644 --- a/tests/unit_tests/fn/test_fn_executor.cpp +++ b/tests/unit_tests/fn/test_fn_executor.cpp @@ -47,7 +47,7 @@ namespace gridtools::fn { }; TEST(stencil_executor, smoke) { - using backend_t = backend::naive; + using backend_t = backend::naive<>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); auto alloc = tmp_allocator(backend_t()); @@ -73,7 +73,7 @@ namespace gridtools::fn { } TEST(vertical_executor, smoke) { - using backend_t = backend::naive; + using backend_t = backend::naive<>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); int a[2][3] = {}, b[2][3] = {}, c[2][3]; diff --git a/tests/unit_tests/fn/test_fn_run.cpp b/tests/unit_tests/fn/test_fn_run.cpp index 50d29b2e40..5ec5e27d67 100644 --- a/tests/unit_tests/fn/test_fn_run.cpp +++ b/tests/unit_tests/fn/test_fn_run.cpp @@ -52,7 +52,7 @@ namespace gridtools::fn { }; TEST(run, stencils) { - using backend_t = backend::naive; + using backend_t = backend::naive<>; using stages_specs_t = meta::list, stencil_stage>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); @@ -74,7 +74,7 @@ namespace gridtools::fn { } TEST(run, scans) { - using backend_t = backend::naive; + using backend_t = backend::naive<>; using stages_specs_t = meta::list, fwd_sum_scan, 1, 2>, column_stage, bwd_sum_scan, 0, 1>>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); From cf7bf2fe7085028797dfbf8b784a208e007f5fe3 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 14 Mar 2023 15:21:09 +0100 Subject: [PATCH 2/8] Enable OpenMP for fn::backend::naive if available --- cmake/public/gridtools_setup_targets.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/public/gridtools_setup_targets.cmake b/cmake/public/gridtools_setup_targets.cmake index c710ac3eaa..553f86f7cb 100644 --- a/cmake/public/gridtools_setup_targets.cmake +++ b/cmake/public/gridtools_setup_targets.cmake @@ -304,6 +304,8 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode) endif() if (OpenMP_CXX_FOUND) + target_link_libraries(${_gt_namespace}fn_naive INTERFACE OpenMP::OpenMP_CXX) + _gt_add_library(${_config_mode} stencil_cpu_kfirst) target_link_libraries(${_gt_namespace}stencil_cpu_kfirst INTERFACE ${_gt_namespace}gridtools OpenMP::OpenMP_CXX) From 4207a31301cf3fb43c764ca31ad6267ecd77aba9 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 14 Mar 2023 15:50:37 +0100 Subject: [PATCH 3/8] Fix template parameter deduction for older GCCs --- tests/unit_tests/fn/test_fn_cartesian.cpp | 4 ++-- tests/unit_tests/fn/test_fn_unstructured.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit_tests/fn/test_fn_cartesian.cpp b/tests/unit_tests/fn/test_fn_cartesian.cpp index ec89529609..71ab6ce41a 100644 --- a/tests/unit_tests/fn/test_fn_cartesian.cpp +++ b/tests/unit_tests/fn/test_fn_cartesian.cpp @@ -44,7 +44,7 @@ namespace gridtools::fn { }; auto fencil = [&](auto const &sizes, auto &out, auto const &in) { - auto be = backend::naive(); + auto be = backend::naive<>(); auto alloc = tmp_allocator(be); auto tmp = allocate_global_tmp(alloc, sizes); auto domain = cartesian_domain(std::array{sizes[0] - 1, sizes[1], sizes[2]}); @@ -80,7 +80,7 @@ namespace gridtools::fn { auto double_scan = [&](auto sizes, auto &a, auto &b, auto const &c) { auto domain = cartesian_domain(sizes); - auto backend = make_backend(backend::naive(), domain); + auto backend = make_backend(backend::naive<>(), domain); apply_double_scan(backend.vertical_executor(), a, b, c); }; diff --git a/tests/unit_tests/fn/test_fn_unstructured.cpp b/tests/unit_tests/fn/test_fn_unstructured.cpp index 1318dadfef..9ed2a3b8c3 100644 --- a/tests/unit_tests/fn/test_fn_unstructured.cpp +++ b/tests/unit_tests/fn/test_fn_unstructured.cpp @@ -45,7 +45,7 @@ namespace gridtools::fn { auto fencil = [&](auto const &v2v_table, int nvertices, int nlevels, auto &out, auto const &in) { auto v2v_conn = connectivity(v2v_table); auto domain = unstructured_domain({nvertices, nlevels}, {}, v2v_conn); - auto backend = make_backend(backend::naive(), domain); + auto backend = make_backend(backend::naive<>(), domain); apply_stencil(backend.stencil_executor(), out, in); }; @@ -77,7 +77,7 @@ namespace gridtools::fn { auto fencil = [&](auto const &v2e_table, int nvertices, int nlevels, auto &out, auto const &in) { auto v2e_conn = connectivity(v2e_table); auto domain = unstructured_domain({nvertices, nlevels}, {}, v2e_conn); - auto backend = make_backend(backend::naive(), domain); + auto backend = make_backend(backend::naive<>(), domain); apply_stencil(backend.stencil_executor(), out, in); }; From 09d87d9bdd1df09b03020db1171b674eee7d3a18 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 21 Mar 2023 08:39:35 +0100 Subject: [PATCH 4/8] Make API backward compatible --- include/gridtools/fn/backend/naive.hpp | 21 +++++++++++++------- tests/include/fn_select.hpp | 10 +++++----- tests/unit_tests/fn/test_fn_cartesian.cpp | 4 ++-- tests/unit_tests/fn/test_fn_executor.cpp | 4 ++-- tests/unit_tests/fn/test_fn_run.cpp | 4 ++-- tests/unit_tests/fn/test_fn_unstructured.cpp | 4 ++-- 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/include/gridtools/fn/backend/naive.hpp b/include/gridtools/fn/backend/naive.hpp index d9c5f717a9..b18096608b 100644 --- a/include/gridtools/fn/backend/naive.hpp +++ b/include/gridtools/fn/backend/naive.hpp @@ -23,8 +23,10 @@ namespace gridtools::fn::backend { namespace naive_impl_ { - template - struct naive {}; + template + struct naive_with_threadpool {}; + + using naive = naive_with_threadpool; template >> auto make_parallel_loops(ThreadPool, Sizes const &sizes) { @@ -44,8 +46,11 @@ namespace gridtools::fn::backend { } template - void apply_stencil_stage( - naive, Sizes const &sizes, StencilStage, MakeIterator &&make_iterator, Composite &&composite) { + void apply_stencil_stage(naive_with_threadpool, + Sizes const &sizes, + StencilStage, + MakeIterator &&make_iterator, + Composite &&composite) { auto ptr = sid::get_origin(std::forward(composite))(); auto strides = sid::get_strides(std::forward(composite)); make_parallel_loops(ThreadPool(), sizes)([make_iterator = make_iterator()](auto ptr, auto const &strides) { @@ -60,7 +65,7 @@ namespace gridtools::fn::backend { class Composite, class Vertical, class Seed> - void apply_column_stage(naive, + void apply_column_stage(naive_with_threadpool, Sizes const &sizes, ColumnStage, MakeIterator &&make_iterator, @@ -77,17 +82,19 @@ namespace gridtools::fn::backend { } template - inline auto tmp_allocator(naive be) { + inline auto tmp_allocator(naive_with_threadpool be) { return std::tuple(be, sid::allocator(&std::make_unique)); } template - auto allocate_global_tmp(std::tuple, Allocator> &alloc, Sizes const &sizes, data_type) { + auto allocate_global_tmp( + std::tuple, Allocator> &alloc, Sizes const &sizes, data_type) { return sid::make_contiguous(std::get<1>(alloc), sizes); } } // namespace naive_impl_ using naive_impl_::naive; + using naive_impl_::naive_with_threadpool; using naive_impl_::apply_column_stage; using naive_impl_::apply_stencil_stage; diff --git a/tests/include/fn_select.hpp b/tests/include/fn_select.hpp index 554ee76a28..02537003ce 100644 --- a/tests/include/fn_select.hpp +++ b/tests/include/fn_select.hpp @@ -26,7 +26,7 @@ #endif #include namespace { - using fn_backend_t = gridtools::fn::backend::naive<>; + using fn_backend_t = gridtools::fn::backend::naive; } #elif defined(GT_FN_GPU) #ifndef GT_STENCIL_GPU @@ -58,13 +58,13 @@ namespace { namespace gridtools::fn::backend { namespace naive_impl_ { template - struct naive; + struct naive_with_threadpool; template - storage::cpu_kfirst backend_storage_traits(naive); + storage::cpu_kfirst backend_storage_traits(naive_with_threadpool); template - timer_dummy backend_timer_impl(naive); + timer_dummy backend_timer_impl(naive_with_threadpool); template - inline char const *backend_name(naive const &) { + inline char const *backend_name(naive_with_threadpool const &) { return "naive"; } } // namespace naive_impl_ diff --git a/tests/unit_tests/fn/test_fn_cartesian.cpp b/tests/unit_tests/fn/test_fn_cartesian.cpp index 71ab6ce41a..ec89529609 100644 --- a/tests/unit_tests/fn/test_fn_cartesian.cpp +++ b/tests/unit_tests/fn/test_fn_cartesian.cpp @@ -44,7 +44,7 @@ namespace gridtools::fn { }; auto fencil = [&](auto const &sizes, auto &out, auto const &in) { - auto be = backend::naive<>(); + auto be = backend::naive(); auto alloc = tmp_allocator(be); auto tmp = allocate_global_tmp(alloc, sizes); auto domain = cartesian_domain(std::array{sizes[0] - 1, sizes[1], sizes[2]}); @@ -80,7 +80,7 @@ namespace gridtools::fn { auto double_scan = [&](auto sizes, auto &a, auto &b, auto const &c) { auto domain = cartesian_domain(sizes); - auto backend = make_backend(backend::naive<>(), domain); + auto backend = make_backend(backend::naive(), domain); apply_double_scan(backend.vertical_executor(), a, b, c); }; diff --git a/tests/unit_tests/fn/test_fn_executor.cpp b/tests/unit_tests/fn/test_fn_executor.cpp index a5ff81f32a..ed1bff5809 100644 --- a/tests/unit_tests/fn/test_fn_executor.cpp +++ b/tests/unit_tests/fn/test_fn_executor.cpp @@ -47,7 +47,7 @@ namespace gridtools::fn { }; TEST(stencil_executor, smoke) { - using backend_t = backend::naive<>; + using backend_t = backend::naive; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); auto alloc = tmp_allocator(backend_t()); @@ -73,7 +73,7 @@ namespace gridtools::fn { } TEST(vertical_executor, smoke) { - using backend_t = backend::naive<>; + using backend_t = backend::naive; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); int a[2][3] = {}, b[2][3] = {}, c[2][3]; diff --git a/tests/unit_tests/fn/test_fn_run.cpp b/tests/unit_tests/fn/test_fn_run.cpp index 5ec5e27d67..50d29b2e40 100644 --- a/tests/unit_tests/fn/test_fn_run.cpp +++ b/tests/unit_tests/fn/test_fn_run.cpp @@ -52,7 +52,7 @@ namespace gridtools::fn { }; TEST(run, stencils) { - using backend_t = backend::naive<>; + using backend_t = backend::naive; using stages_specs_t = meta::list, stencil_stage>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); @@ -74,7 +74,7 @@ namespace gridtools::fn { } TEST(run, scans) { - using backend_t = backend::naive<>; + using backend_t = backend::naive; using stages_specs_t = meta::list, fwd_sum_scan, 1, 2>, column_stage, bwd_sum_scan, 0, 1>>; auto domain = hymap::keys, int_t<1>>::make_values(2_c, 3_c); diff --git a/tests/unit_tests/fn/test_fn_unstructured.cpp b/tests/unit_tests/fn/test_fn_unstructured.cpp index 9ed2a3b8c3..1318dadfef 100644 --- a/tests/unit_tests/fn/test_fn_unstructured.cpp +++ b/tests/unit_tests/fn/test_fn_unstructured.cpp @@ -45,7 +45,7 @@ namespace gridtools::fn { auto fencil = [&](auto const &v2v_table, int nvertices, int nlevels, auto &out, auto const &in) { auto v2v_conn = connectivity(v2v_table); auto domain = unstructured_domain({nvertices, nlevels}, {}, v2v_conn); - auto backend = make_backend(backend::naive<>(), domain); + auto backend = make_backend(backend::naive(), domain); apply_stencil(backend.stencil_executor(), out, in); }; @@ -77,7 +77,7 @@ namespace gridtools::fn { auto fencil = [&](auto const &v2e_table, int nvertices, int nlevels, auto &out, auto const &in) { auto v2e_conn = connectivity(v2e_table); auto domain = unstructured_domain({nvertices, nlevels}, {}, v2e_conn); - auto backend = make_backend(backend::naive<>(), domain); + auto backend = make_backend(backend::naive(), domain); apply_stencil(backend.stencil_executor(), out, in); }; From 02fabcaf4c81bb53308bec69cd474be2258bbed9 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 21 Mar 2023 08:50:02 +0100 Subject: [PATCH 5/8] Introduce dummy thread pool --- include/gridtools/fn/backend/naive.hpp | 4 +-- include/gridtools/thread_pool/dummy.hpp | 41 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 include/gridtools/thread_pool/dummy.hpp diff --git a/include/gridtools/fn/backend/naive.hpp b/include/gridtools/fn/backend/naive.hpp index b18096608b..0aa0a3353d 100644 --- a/include/gridtools/fn/backend/naive.hpp +++ b/include/gridtools/fn/backend/naive.hpp @@ -18,7 +18,7 @@ #include "../../sid/multi_shift.hpp" #include "../../sid/unknown_kind.hpp" #include "../../thread_pool/concept.hpp" -#include "../../thread_pool/omp.hpp" +#include "../../thread_pool/dummy.hpp" #include "./common.hpp" namespace gridtools::fn::backend { @@ -26,7 +26,7 @@ namespace gridtools::fn::backend { template struct naive_with_threadpool {}; - using naive = naive_with_threadpool; + using naive = naive_with_threadpool; template >> auto make_parallel_loops(ThreadPool, Sizes const &sizes) { diff --git a/include/gridtools/thread_pool/dummy.hpp b/include/gridtools/thread_pool/dummy.hpp new file mode 100644 index 0000000000..a0b12cb1e0 --- /dev/null +++ b/include/gridtools/thread_pool/dummy.hpp @@ -0,0 +1,41 @@ +/* + * GridTools + * + * Copyright (c) 2014-2021, ETH Zurich + * All rights reserved. + * + * Please, refer to the LICENSE file in the root directory. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#pragma once + +namespace gridtools { + namespace thread_pool { + struct dummy { + friend auto thread_pool_get_thread_num(dummy) { return 0; } + friend auto thread_pool_get_max_threads(dummy) { return 1; } + + template + friend void thread_pool_parallel_for_loop(dummy, F const &f, I lim) { + for (I i = 0; i < lim; ++i) + f(i); + } + + template + friend void thread_pool_parallel_for_loop(dummy, F const &f, I i_lim, J j_lim) { + for (J j = 0; j < j_lim; ++j) + for (I i = 0; i < i_lim; ++i) + f(i, j); + } + + template + friend void thread_pool_parallel_for_loop(dummy, F const &f, I i_lim, J j_lim, K k_lim) { + for (K k = 0; k < k_lim; ++k) + for (J j = 0; j < j_lim; ++j) + for (I i = 0; i < i_lim; ++i) + f(i, j, k); + } + }; + } // namespace thread_pool +} // namespace gridtools From 00b4cb9370f93aa69870d4e602a2b89e2feeacd5 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 21 Mar 2023 09:20:40 +0100 Subject: [PATCH 6/8] Revert changes to OpenMP thread pool --- include/gridtools/common/omp.hpp | 2 +- include/gridtools/thread_pool/omp.hpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/gridtools/common/omp.hpp b/include/gridtools/common/omp.hpp index f1f6470030..9e3d95b390 100644 --- a/include/gridtools/common/omp.hpp +++ b/include/gridtools/common/omp.hpp @@ -9,7 +9,7 @@ */ #pragma once -#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) +#ifdef _OPENMP #include #else extern "C" { diff --git a/include/gridtools/thread_pool/omp.hpp b/include/gridtools/thread_pool/omp.hpp index 5ce8dbfeb5..b773b3c5c5 100644 --- a/include/gridtools/thread_pool/omp.hpp +++ b/include/gridtools/thread_pool/omp.hpp @@ -10,11 +10,14 @@ #pragma once -#include "../common/omp.hpp" +#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) +#include +#endif namespace gridtools { namespace thread_pool { struct omp { +#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) friend auto thread_pool_get_thread_num(omp) { return omp_get_thread_num(); } friend auto thread_pool_get_max_threads(omp) { return omp_get_max_threads(); } @@ -41,6 +44,7 @@ namespace gridtools { for (I i = 0; i < i_lim; ++i) f(i, j, k); } +#endif }; } // namespace thread_pool } // namespace gridtools From 328881b090052ea7ee925e49234aa4cc9e9960aa Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 21 Mar 2023 09:23:55 +0100 Subject: [PATCH 7/8] Enable OpenMP thread pool in fn::backend::naive if available --- include/gridtools/fn/backend/naive.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/gridtools/fn/backend/naive.hpp b/include/gridtools/fn/backend/naive.hpp index 0aa0a3353d..d8ac7050af 100644 --- a/include/gridtools/fn/backend/naive.hpp +++ b/include/gridtools/fn/backend/naive.hpp @@ -19,6 +19,7 @@ #include "../../sid/unknown_kind.hpp" #include "../../thread_pool/concept.hpp" #include "../../thread_pool/dummy.hpp" +#include "../../thread_pool/omp.hpp" #include "./common.hpp" namespace gridtools::fn::backend { @@ -26,7 +27,13 @@ namespace gridtools::fn::backend { template struct naive_with_threadpool {}; - using naive = naive_with_threadpool; + using naive = naive_with_threadpool< +#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND) + thread_pool::omp +#else + thread_pool::dummy +#endif + >; template >> auto make_parallel_loops(ThreadPool, Sizes const &sizes) { From 1ee21e6530dd84539a4cfd9727d4821e57630637 Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Tue, 21 Mar 2023 11:24:46 +0100 Subject: [PATCH 8/8] Fix older NVCC compilation --- include/gridtools/fn/backend/naive.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gridtools/fn/backend/naive.hpp b/include/gridtools/fn/backend/naive.hpp index d8ac7050af..df8618ccae 100644 --- a/include/gridtools/fn/backend/naive.hpp +++ b/include/gridtools/fn/backend/naive.hpp @@ -90,7 +90,7 @@ namespace gridtools::fn::backend { template inline auto tmp_allocator(naive_with_threadpool be) { - return std::tuple(be, sid::allocator(&std::make_unique)); + return std::make_tuple(be, sid::allocator(&std::make_unique)); } template