8000 Half preconditioner, multigrid, log, and reorder by yhmtsai · Pull Request #1713 · ginkgo-project/ginkgo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Half preconditioner, multigrid, log, and reorder #1713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@ set(gko_test_option_args "NO_RESOURCES;NO_GTEST_MAIN")

## Replaces / by _ to create valid target names from relative paths
function(ginkgo_build_test_name test_name target_name)
cmake_parse_arguments(PARSE_ARGV 2 build_test_name "" "${gko_test_single_args}" "")
file(RELATIVE_PATH REL_BINARY_DIR
${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
set(test_binary_name ${test_name})
if (build_test_name_EXECUTABLE_NAME)
set(test_binary_name ${build_test_name_EXECUTABLE_NAME})
endif()
string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_binary_name}")
set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
endfunction()

Expand Down Expand Up @@ -127,7 +132,7 @@ endfunction()

## Normal test
function(ginkgo_create_test test_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
add_executable(${test_target_name} ${test_name}.cpp)
target_link_libraries(${test_target_name})
ginkgo_set_test_target_properties(${test_target_name} "_cpu" ${ARGN})
Expand All @@ -136,7 +141,7 @@ endfunction(ginkgo_create_test)

## Test compiled with dpcpp
function(ginkgo_create_dpcpp_test test_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
add_executable(${test_target_name} ${test_name}.dp.cpp)
target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
gko_add_sycl_to_target(TARGET ${test_target_name} SOURCES ${test_name}.dp.cpp)
Expand All @@ -151,7 +156,7 @@ endfunction(ginkgo_create_dpcpp_test)

## Test compiled with CUDA
function(ginkgo_create_cuda_test test_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
ginkgo_create_cuda_test_internal(${test_name} ${test_name}.cu ${test_target_name} ${ARGN})
endfunction(ginkgo_create_cuda_test)

Expand All @@ -177,7 +182,7 @@ endfunction(ginkgo_create_cuda_test_internal)

## Test compiled with HIP
function(ginkgo_create_hip_test test_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
ginkgo_create_hip_test_internal(${test_name} ${test_name}.hip.cpp ${test_target_name} ${ARGN})
endfunction(ginkgo_create_hip_test)

Expand All @@ -196,12 +201,12 @@ endfunction(ginkgo_create_hip_test_internal)

## Test compiled with OpenMP
function(ginkgo_create_omp_test test_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
ginkgo_create_omp_test_internal(${test_name} ${test_name}.cpp ${test_target_name} "" ${ARGN})
endfunction()

functi 5670 on(ginkgo_create_omp_test_internal test_name filename test_target_name)
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
add_executable(${test_target_name} ${test_name}.cpp)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP GKO_DEVICE_NAMESPACE=omp)
target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
Expand Down Expand Up @@ -241,7 +246,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
else ()
set(test_resource_type sycl)
endif ()
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
string(TOUPPER ${exec} exec_upper)

# set up actual test
Expand All @@ -267,7 +272,7 @@ endfunction(ginkgo_create_common_test_internal)
## Common test compiled with the device compiler, one target for each enabled backend
function(ginkgo_create_common_device_test test_name)
cmake_parse_arguments(PARSE_ARGV 1 common_device_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
ginkgo_build_test_name(${test_name} test_target_name)
ginkgo_build_test_name(${test_name} test_target_name ${ARGN})
if(GINKGO_BUILD_SYCL)
ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
Expand Down
6 changes: 6 additions & 0 deletions common/cuda_hip/base/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ struct truncate_type_impl<thrust::complex<T>> {
};


template <typename T>
struct type_size_impl<thrust::complex<T>> {
static constexpr auto value = sizeof(T) * byte_size;
};


template <typename T>
struct is_complex_impl<thrust::complex<T>> : public std::true_type {};

Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/components/warp_blas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ __device__ __forceinline__ remove_complex<ValueType> compute_infinity_norm(
}
}
return reduce(group, sum,
[](result_type x, result_type y) { return max(x, y); });
[](result_type x, result_type y) { return gko::max(x, y); });
}


Expand Down
5 changes: 3 additions & 2 deletions common/cuda_hip/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, size_type nnz,
thrust::sort_by_key(thrust_policy(exec), it, it + nnz, vals_it);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_PGM_SORT_ROW_MAJOR);


template <typename ValueType, typename IndexType>
Expand All @@ -78,7 +79,7 @@ void compute_coarse_coo(std::shared_ptr<const DefaultExecutor> exec,
vals_it, coarse_key_it, coarse_vals_it);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_PGM_COMPUTE_COARSE_COO);


Expand Down
10 changes: 5 additions & 5 deletions common/cuda_hip/preconditioner/isai_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ void generate_tri_inverse(std::shared_ptr<const DefaultExecutor> exec,
components::prefix_sum_nonnegative(exec, excess_nz_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL);


Expand Down Expand Up @@ -516,7 +516,7 @@ void generate_general_inverse(std::shared_ptr<const DefaultExecutor> exec,
components::prefix_sum_nonnegative(exec, excess_nz_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL);


Expand Down Expand Up @@ -548,7 +548,7 @@ void generate_excess_system(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL);


Expand All @@ -568,7 +568,7 @@ void scale_excess_solution(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL);


Expand All @@ -593,7 +593,7 @@ void scatter_excess_solution(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ void apply(std::shared_ptr<const DefaultExecutor> exec, size_type num_blocks,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_APPLY_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_APPLY_KERNEL);


} // namespace jacobi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void advanced_apply(
const preconditioner::block_interleaved_storage_scheme<IndexType>&, \
const ValueType*, const ValueType*, size_type, ValueType*, size_type)

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
DECLARE_JACOBI_ADVANCED_APPLY_INSTANTIATION);


Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/preconditioner/jacobi_generate_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void generate(std::shared_ptr<const DefaultExecutor> exec,
block_pointers.get_const_data(), num_blocks);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_GENERATE_KERNEL);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ void generate(syn::value_list<int, max_block_size>,
remove_complex<ValueType>*, precision_reduction*, const IndexType*, \
size_type)

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
DECLARE_JACOBI_GENERATE_INSTANTIATION);


Expand Down
8 changes: 4 additions & 4 deletions common/cuda_hip/preconditioner/jacobi_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ void find_blocks(std::shared_ptr<const DefaultExecutor> exec,
exec, max_block_size, num_natural_blocks, block_pointers.get_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL);


Expand Down Expand Up @@ -364,7 +364,7 @@ void transpose_jacobi(
storage_scheme, out_blocks.get_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL);


Expand All @@ -388,7 +388,7 @@ void conj_transpose_jacobi(
storage_scheme, out_blocks.get_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


Expand All @@ -401,7 +401,7 @@ void convert_to_dense(
storage_scheme,
ValueType* result_values, size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void simple_apply(
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void apply(syn::value_list<int, max_block_size>,
const preconditioner::block_interleaved_storage_scheme<IndexType>&, \
const ValueType*, size_type, ValueType*, size_type)

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
DECLARE_JACOBI_SIMPLE_APPLY_INSTANTIATION);


Expand Down
17 changes: 9 additions & 8 deletions common/cuda_hip/preconditioner/sor_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void initialize_weighted_l(
const auto grid_dim = static_cast<uint32>(
ceildiv(num_rows, static_cast<size_type>(block_size)));

auto inv_weight = one(weight) / weight;
auto inv_weight = as_device_type(one(weight) / weight);

if (grid_dim > 0) {
using namespace gko::factorization;
Expand All @@ -46,7 +46,7 @@ void initialize_weighted_l(
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L);


Expand All @@ -62,9 +62,10 @@ void initialize_weighted_l_u(
const auto grid_dim = static_cast<uint32>(
ceildiv(num_rows, static_cast<size_type>(block_size)));

auto inv_weight = one(weight) / weight;
auto inv_two_minus_weight =
one(weight) / (static_cast<remove_complex<ValueType>>(2.0) - weight);
auto inv_weight = as_device_type(one(weight) / weight);
auto inv_two_minus_weight = as_device_type(
one(weight) / (static_cast<remove_complex<ValueType>>(2.0) - weight));
auto d_weight = as_device_type(weight);

if (grid_dim > 0) {
using namespace gko::factorization;
Expand All @@ -87,13 +88,13 @@ void initialize_weighted_l_u(
[inv_two_minus_weight] __device__(auto val) {
return val * inv_two_minus_weight;
},
[weight, inv_two_minus_weight] __device__(auto val) {
return val * weight * inv_two_minus_weight;
[d_weight, inv_two_minus_weight] __device__(auto val) {
return val * d_weight * inv_two_minus_weight;
}));
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_SOR_INITIALIZE_WEIGHTED_L_U);


Expand Down
12 changes: 7 additions & 5 deletions common/cuda_hip/solver/multigrid_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ void kcycle_step_1(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_WITH_HALF(
GKO_DECLARE_MULTIGRID_KCYCLE_STEP_1_KERNEL);


template <typename ValueType>
Expand Down Expand Up @@ -174,7 +175,8 @@ void kcycle_step_2(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_WITH_HALF(
GKO_DECLARE_MULTIGRID_KCYCLE_STEP_2_KERNEL);


template <typename ValueType>
Expand All @@ -192,13 +194,13 @@ void kcycle_check_stop(std::shared_ptr<const DefaultExecutor> exec,
kernel::kcycle_check_stop_kernel<<<grid, default_block_size, 0,
exec->get_stream()>>>(
nrhs, as_device_type(old_norm->get_const_values()),
as_device_type(new_norm->get_const_values()), rel_tol,
as_device_type(dis_stop.get_data()));
as_device_type(new_norm->get_const_values()),
as_device_type(rel_tol), as_device_type(dis_stop.get_data()));
}
is_stop = get_element(dis_stop, 0);
}

GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(
GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE_WITH_HALF(
GKO_DECLARE_MULTIGRID_KCYCLE_CHECK_STOP_KERNEL);


Expand Down
14 changes: 7 additions & 7 deletions common/unified/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ void find_strongest_neighbor(
continue;
}
auto weight =
weight_vals[idx] / max(abs(diag[row]), abs(diag[col]));
weight_vals[idx] / gko::max(abs(diag[row]), abs(diag[col]));
if (agg[col] == -1 &&
device_std::tie(weight, col) >
device_std::tie(max_weight_unagg, strongest_unagg)) {
Expand Down Expand Up @@ -217,7 +217,7 @@ void find_strongest_neighbor(
strongest_neighbor.get_data());
}

GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_PGM_FIND_STRONGEST_NEIGHBOR);

template <typename ValueType, typename IndexType>
Expand Down Expand Up @@ -246,8 +246,8 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
if (col == row) {
continue;
}
auto weight =
weight_vals[idx] / max(abs(diag[row]), abs(diag[col]));
auto weight = weight_vals[idx] /
gko::max(abs(diag[row]), abs(diag[col]));
if (agg_const_val[col] != -1 &&
device_std::tie(weight, col) >
device_std::tie(max_weight_agg, strongest_agg)) {
Expand Down Expand Up @@ -284,8 +284,8 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
if (col == row) {
continue;
}
auto weight =
weight_vals[idx] / max(abs(diag[row]), abs(diag[col]));
auto weight = weight_vals[idx] /
gko::max(abs(diag[row]), abs(diag[col]));
if (agg_val[col] != -1 &&
device_std::tie(weight, col) >
device_std::tie(max_weight_agg, strongest_agg)) {
Expand All @@ -305,7 +305,7 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
}
}

GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE_WITH_HALF(
GKO_DECLARE_PGM_ASSIGN_TO_EXIST_AGG);


Expand Down
Loading
Loading
0