8000 Parallel fn::backend::naive by fthaler · Pull Request #1746 · GridTools/gridtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Parallel fn::backend::naive #1746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/public/gridtools_setup_targets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode)
endif()

if (OpenMP_CXX_FOUND)
target_link_libraries(${_gt_namespace}fn_naive INTERFACE OpenMP::OpenMP_CXX)

_gt_add_library(${_config_mode} stencil_cpu_kfirst)
target_link_libraries(${_gt_namespace}stencil_cpu_kfirst INTERFACE ${_gt_namespace}gridtools OpenMP::OpenMP_CXX)

Expand Down
67 changes: 56 additions & 11 deletions include/gridtools/fn/backend/naive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,68 @@

#include "../../common/functional.hpp"
#include "../../common/hymap.hpp"
#include "../../common/tuple_util.hpp"
#include "../../sid/allocator.hpp"
#include "../../sid/concept.hpp"
#include "../../sid/contiguous.hpp"
#include "../../sid/multi_shift.hpp"
#include "../../sid/unknown_kind.hpp"
#include "../../thread_pool/concept.hpp"
#include "../../thread_pool/dummy.hpp"
#include "../../thread_pool/omp.hpp"
#include "./common.hpp"

namespace gridtools::fn::backend {
namespace naive_impl_ {
struct naive {};
template <class ThreadPool>
struct naive_with_threadpool {};

template <class Sizes, class StencilStage, class MakeIterator, class Composite>
void apply_stencil_stage(
naive, Sizes const &sizes, StencilStage, MakeIterator &&make_iterator, Composite &&composite) {
using naive = naive_with_threadpool<
#if defined(_OPENMP) || defined(GT_HIP_OPENMP_WORKAROUND)
thread_pool::omp
#else
thread_pool::dummy
#endif
>;

template <class ThreadPool, class Sizes, class Dims = meta::rename<hymap::keys, get_keys<Sizes>>>
auto make_parallel_loops(ThreadPool, Sizes const &sizes) {
return [=](auto f) {
return [=](auto ptr, auto const &strides) {
auto loop_f = [&](auto... indices) {
auto local_ptr = ptr;
sid::multi_shift(local_ptr, strides, Dims::make_values(indices...));
f(local_ptr, strides);
};

tuple_util::apply(
[&](auto... sizes) { thread_pool::parallel_for_loop(ThreadPool(), loop_f, int(sizes)...); },
sizes);
};
};
}

template <class ThreadPool, class Sizes, class StencilStage, class MakeIterator, class Composite>
void apply_stencil_stage(naive_with_threadpool<ThreadPool>,
Sizes const &sizes,
StencilStage,
MakeIterator &&make_iterator,
Composite &&composite) {
auto ptr = sid::get_origin(std::forward<Composite>(composite))();
auto strides = sid::get_strides(std::forward<Composite>(composite));
common::make_loops(sizes)([make_iterator = make_iterator()](auto ptr, auto const &strides) {
make_parallel_loops(ThreadPool(), sizes)([make_iterator = make_iterator()](auto ptr, auto const &strides) {
StencilStage()(make_iterator, ptr, strides);
})(ptr, strides);
}

template <class Sizes, class ColumnStage, class MakeIterator, class Composite, class Vertical, class Seed>
void apply_column_stage(naive,
template <class ThreadPool,
class Sizes,
class ColumnStage,
class MakeIterator,
class Composite,
class Vertical,
class Seed>
void apply_column_stage(naive_with_threadpool<ThreadPool>,
Sizes const &sizes,
ColumnStage,
MakeIterator &&make_iterator,
Expand All @@ -42,21 +82,26 @@ namespace gridtools::fn::backend {
auto ptr = sid::get_origin(std::forward<Composite>(composite))();
auto strides = sid::get_strides(std::forward<Composite>(composite));
auto v_size = at_key<Vertical>(sizes);
common::make_loops(hymap::canonicalize_and_remove_key<Vertical>(sizes))(
make_parallel_loops(ThreadPool(), hymap::canonicalize_and_remove_key<Vertical>(sizes))(
[v_size = std::move(v_size), make_iterator = make_iterator(), seed = std::move(seed)](auto ptr,
auto const &strides) { ColumnStage()(seed, v_size, make_iterator, std::move(ptr), strides); })(
ptr, strides);
}

inline auto tmp_allocator(naive be) { return std::tuple(be, sid::allocator(&std::make_unique<char[]>)); }
template <class ThreadPool>
inline auto tmp_allocator(naive_with_threadpool<ThreadPool> be) {
return std::make_tuple(be, sid::allocator(&std::make_unique<char[]>));
}

template <class Allocator, class Sizes, class T>
auto allocate_global_tmp(std::tuple<naive, Allocator> &alloc, Sizes const &sizes, data_type<T>) {
template <class ThreadPool, class Allocator, class Sizes, class T>
auto allocate_global_tmp(
std::tuple<naive_with_threadpool<ThreadPool>, Allocator> &alloc, Sizes const &sizes, data_type<T>) {
return sid::make_contiguous<T, int_t, sid::unknown_kind>(std::get<1>(alloc), sizes);
}
} // namespace naive_impl_

using naive_impl_::naive;
using naive_impl_::naive_with_threadpool;

using naive_impl_::apply_column_stage;
using naive_impl_::apply_stencil_stage;
Expand Down
41 changes: 41 additions & 0 deletions include/gridtools/thread_pool/dummy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* GridTools
*
* Copyright (c) 2014-2021, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/

#pragma once

namespace gridtools {
namespace thread_pool {
struct dummy {
friend auto thread_pool_get_thread_num(dummy) { return 0; }
friend auto thread_pool_get_max_threads(dummy) { return 1; }

template <class F, class I>
friend void thread_pool_parallel_for_loop(dummy, F const &f, I lim) {
for (I i = 0; i < lim; ++i)
f(i);
}

template <class F, class I, class J>
friend void thread_pool_parallel_for_loop(dummy, F const &f, I i_lim, J j_lim) {
for (J j = 0; j < j_lim; ++j)
for (I i = 0; i < i_lim; ++i)
f(i, j);
}

template <class F, class I, class J, class K>
friend void thread_pool_parallel_for_loop(dummy, F const &f, I i_lim, J j_lim, K k_lim) {
for (K k = 0; k < k_lim; ++k)
for (J j = 0; j < j_lim; ++j)
for (I i = 0; i < i_lim; ++i)
f(i, j, k);
}
};
} // namespace thread_pool
} // namespace gridtools
14 changes: 10 additions & 4 deletions tests/include/fn_select.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,16 @@ namespace {

namespace gridtools::fn::backend {
namespace naive_impl_ {
struct naive;
storage::cpu_kfirst backend_storage_traits(naive);
timer_dummy backend_timer_impl(naive);
inline char const *backend_name(naive const &) { return "naive"; }
template <class ThreadPool>
struct naive_with_threadpool;
template <class ThreadPool>
storage::cpu_kfirst backend_storage_traits(naive_with_threadpool<ThreadPool>);
template <class ThreadPool>
timer_dummy backend_timer_impl(naive_with_threadpool<ThreadPool>);
template <class ThreadPool>
inline char const *backend_name(naive_with_threadpool<ThreadPool> const &) {
return "naive";
}
} // namespace naive_impl_

namespace gpu_impl_ {
Expand Down
0