8000 add oneMKL csr and optimized csr to benchmark by yhmtsai · Pull Request #1057 · ginkgo-project/ginkgo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

add oneMKL csr and optimized csr to benchmark #1057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ function(ginkgo_benchmark_hipsparse_linops type def)
target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
endfunction()

function(ginkgo_benchmark_onemkl_linops type def)
add_library(onemkl_linops_${type} utils/dpcpp_linops.dp.cpp)
# make the dependency public to catch issues
target_compile_definitions(onemkl_linops_${type} PUBLIC ${def})
target_compile_features(onemkl_linops_${type} PRIVATE cxx_std_17)
target_link_libraries(onemkl_linops_${type} PRIVATE Ginkgo::ginkgo MKL::MKL_DPCPP)
endfunction()


# Generates an executable for one precision. Each executable will be linked to
# `ginkgo`, `gflags` and `rapidjson`.
Expand Down Expand Up @@ -78,6 +86,10 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty
target_compile_definitions("${name}" PRIVATE HAS_HIP=1)
target_link_libraries("${name}" hipsparse_linops_${type})
endif()
if (GINKGO_BUILD_DPCPP)
target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1)
target_link_libraries("${name}" onemkl_linops_${type})
endif()
endif()
endfunction(ginkgo_add_single_benchmark_executable)

Expand Down Expand Up @@ -127,6 +139,12 @@ if (GINKGO_BUILD_HIP)
target_link_libraries(hip_timer ginkgo)
endif()

if (GINKGO_BUILD_DPCPP)
ginkgo_benchmark_onemkl_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
ginkgo_benchmark_onemkl_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
ginkgo_benchmark_onemkl_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
ginkgo_benchmark_onemkl_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
endif()

add_subdirectory(blas)
add_subdirectory(conversions)
Expand Down
24 changes: 11 additions & 13 deletions benchmark/utils/cuda_linops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,16 @@ public:
// uses gko::array to allocate buffer.
std::shared_ptr<const gko::CudaExecutor> get_gpu_exec() const
{
return gpu_exec_;
return std::dynamic_pointer_cast<const gko::CudaExecutor>(
this->get_executor());
}

protected:
CusparseBase(std::shared_ptr<const gko::Executor> exec,
const gko::dim<2>& size = gko::dim<2>{})
: gko::LinOp(exec, size)
{
gpu_exec_ = std::dynamic_pointer_cast<const gko::CudaExecutor>(exec);
if (gpu_exec_ == nullptr) {
if (this->get_gpu_exec() == nullptr) {
GKO_NOT_IMPLEMENTED;
}
this->initialize_descr();
Expand All @@ -95,15 +95,14 @@ protected:
{
if (this != &other) {
gko::LinOp::operator=(other);
this->gpu_exec_ = other.gpu_exec_;
this->initialize_descr();
}
return *this;
}

void initialize_descr()
{
const auto id = this->gpu_exec_->get_device_id();
const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
this->descr_ = handle_manager<cusparseMatDescr>(
gko::kernels::cuda::cusparse::create_mat_descr(),
Expand All @@ -114,7 +113,6 @@ protected:
}

private:
std::shared_ptr<const gko::CudaExecutor> gpu_exec_;
template <typename T>
using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
handle_manager<cusparseMatDescr> descr_;
Expand Down Expand Up @@ -152,7 +150,7 @@ public:
void read(const mat_data& data) override
{
csr_->read(data);
this->set_size(gko::dim<2>{csr_->get_size()});
this->set_size(csr_->get_size());
}

gko::size_type get_num_stored_elements() const noexcept
Expand Down Expand Up @@ -227,7 +225,7 @@ public:
void read(const mat_data& data) override
{
csr_->read(data);
this->set_size(gko::dim<2>{csr_->get_size()});
this->set_size(csr_->get_size());
}

gko::size_type get_num_stored_elements() const noexcept
Expand Down Expand Up @@ -303,7 +301,7 @@ public:
void read(const mat_data& data) override
{
csr_->read(data);
this->set_size(gko::dim<2>{csr_->get_size()});
this->set_size(csr_->get_size());
}

gko::size_type get_num_stored_elements() const noexcept
Expand Down Expand Up @@ -383,7 +381,7 @@ public:
void read(const mat_data& data) override
{
csr_->read(data);
this->set_size(gko::dim<2>{csr_->get_size()});
this->set_size(csr_->get_size());
}

gko::size_type get_num_stored_elements() const noexcept
Expand Down Expand Up @@ -492,7 +490,7 @@ public:
auto t_csr = csr::create(this->get_executor(),
std::make_shared<typename csr::classical>());
t_csr->read(data);
this->set_size(gko::dim<2>{t_csr->get_size()});
this->set_size(t_csr->get_size());

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
Expand Down Expand Up @@ -638,7 +636,7 @@ public:
{
using gko::kernels::cuda::as_culibs_type;
csr_->read(data);
this->set_size(gko::dim<2>{csr_->get_size()});
this->set_size(csr_->get_size());
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseCreateCsr(&mat_, csr_->get_size()[0], csr_->get_size()[1],
csr_->get_num_stored_elements(),
Expand Down Expand Up @@ -731,7 +729,7 @@ public:
{
using gko::kernels::cuda::as_culibs_type;
coo_->read(data);
this->set_size(gko::dim<2>{coo_->get_size()});
this->set_size(coo_->get_size());
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseCreateCoo(&mat_, coo_->get_size()[0], coo_->get_size()[1],
coo_->get_num_stored_elements(),
Expand Down
191 changes: 191 additions & 0 deletions benchmark/utils/dpcpp_linops.dp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2022, the Ginkgo authors
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include <ginkgo/ginkgo.hpp>


#include <memory>


#include <oneapi/mkl.hpp>


#include "benchmark/utils/sparselib_linops.hpp"
#include "benchmark/utils/types.hpp"


class onemkl_csr {};
class onemkl_optimized_csr {};


namespace detail {


inline oneapi::mkl::sparse::matrix_handle_t create_mat_handle()
{
oneapi::mkl::sparse::matrix_handle_t mat_handle;
oneapi::mkl::sparse::init_matrix_handle(&mat_handle);
return mat_handle;
}


class OnemklBase : public gko::LinOp {
public:
oneapi::mkl::sparse::matrix_handle_t get_mat_handle() const
{
return this->mat_handle_.get();
}

std::shared_ptr<const gko::DpcppExecutor> get_device_exec() const
{
return std::dynamic_pointer_cast<const gko::DpcppExecutor>(
this->get_executor());
}

protected:
void apply_impl(const gko::LinOp*, const gko::LinOp*, const gko::LinOp*,
gko::LinOp*) const override
{
GKO_NOT_IMPLEMENTED;
}

void initialize_mat_handle()
{
mat_handle_ = handle_manager<oneapi::mkl::sparse::matrix_handle>(
create_mat_handle(),
[](oneapi::mkl::sparse::matrix_handle_t mat_handle) {
oneapi::mkl::sparse::release_matrix_handle(&mat_handle);
});
}

OnemklBase(std::shared_ptr<const gko::Executor> exec,
const gko::dim<2>& size = gko::dim<2>{})
: gko::LinOp(exec, size)
{
if (this->get_device_exec() == nullptr) {
GKO_NOT_IMPLEMENTED;
}
this->initialize_mat_handle();
}

~OnemklBase() = default;

OnemklBase(const OnemklBase& other) = delete;

OnemklBase& operator=(const OnemklBase& other)
{
if (this != &other) {
gko::LinOp::operator=(other);
this->initialize_mat_handle();
}
return *this;
}

private:
template <typename T>
using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
handle_manager<oneapi::mkl::sparse::matrix_handle> mat_handle_;
};


template <bool optimized = false, typename ValueType = gko::default_precision,
typename IndexType = gko::int32>
class OnemklCsr
: public gko::EnableLinOp<OnemklCsr<optimized, ValueType, IndexType>,
OnemklBase>,
public gko::EnableCreateMethod<
OnemklCsr<optimized, ValueType, IndexType>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<OnemklCsr>;
friend class gko::EnablePolymorphicObject<OnemklCsr, OnemklBase>;

public:
using Csr = gko::matrix::Csr<ValueType, IndexType>;
using mat_data = gko::matrix_data<ValueType, IndexType>;
void read(const mat_data& data) override
{
csr_->read(data);
this->set_size(csr_->get_size());

oneapi::mkl::sparse::set_csr_data(
this->get_mat_handle(), static_cast<int>(this->get_size()[0]),
static_cast<int>(this->get_size()[1]),
oneapi::mkl::index_base::zero, csr_->get_row_ptrs(),
csr_->get_col_idxs(), csr_->get_values());
if (optimized) {
// need the last argument {} to make sure that it uses USM version.
oneapi::mkl::sparse::optimize_gemv(
*(this->get_device_exec()->get_queue()), trans_,
this->get_mat_handle(), {});
}
}

gko::size_type get_num_stored_elements() const noexcept
{
return csr_->get_num_stored_elements();
}

protected:
void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
{
auto dense_b = gko::as<gko::matrix::Dense<ValueType>>(b);
auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();

oneapi::mkl::sparse::gemv(
*(this->get_device_exec()->get_queue()), trans_,
gko::one<ValueType>(), this->get_mat_handle(),
const_cast<ValueType*>(db), gko::zero<ValueType>(), dx);
}

OnemklCsr(std::shared_ptr<const gko::Executor> exec,
const gko::dim<2>& size = gko::dim<2>{})
: gko::EnableLinOp<OnemklCsr, OnemklBase>(exec, size),
csr_(std::move(
Csr::create(exec, std::make_shared<typename Csr::classical>()))),
trans_(oneapi::mkl::transpose::nontrans)
{}


private:
oneapi::mkl::transpose trans_;
std::shared_ptr<Csr> csr_;
};


} // namespace detail


IMPL_CREATE_SPARSELIB_LINOP(onemkl_csr, detail::OnemklCsr<false, etype, itype>);
IMPL_CREATE_SPARSELIB_LINOP(onemkl_optimized_csr,
detail::OnemklCsr<true, etype, itype>);
14 changes: 13 additions & 1 deletion benchmark/utils/formats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ std::string available_format =
", hipsparse_csr, hipsparse_csrmm, hipsparse_coo, hipsparse_ell, "
"hipsparse_hybrid"
#endif // HAS_HIP
#ifdef HAS_DPCPP
", onemkl_csr, onemkl_optimized_csr"
#endif // HAS_DPCPP
".\n";

std::string format_description =
Expand Down Expand Up @@ -124,8 +127,13 @@ std::string format_description =
"hipsparse_coo: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
" with HIPSPARSE_HYB_PARTITION_USER\n"
"hipsparse_ell: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
" with HIPSPARSE_HYB_PARTITION_MAX"
" with HIPSPARSE_HYB_PARTITION_MAX\n"
#endif // HAS_HIP
#ifdef HAS_DPCPP
"onemkl_csr: oneMKL Csr SpMV\n"
"onemkl_optimized_csr: oneMKL optimized Csr SpMV using optimize_gemv after "
"reading the matrix"
#endif // HAS_DPCPP
;

std::string format_command =
Expand Down Expand Up @@ -328,6 +336,10 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
{"hipsparse_coo", read_splib_matrix_from_data<hipsparse_coo>},
{"hipsparse_ell", read_splib_matrix_from_data<hipsparse_ell>},
#endif // HAS_HIP
#ifdef HAS_DPCPP
{"onemkl_csr", read_splib_matrix_from_data<onemkl_csr>},
{"onemkl_optimized_csr", read_splib_matrix_from_data<onemkl_optimized_csr>},
#endif // HAS_DPCPP
{"hybrid", read_matrix_from_data<hybrid>},
{"hybrid0",
READ_MATRIX(hybrid, std::make_shared<hybrid::imbalance_limit>(0))},
Expand Down
Loading
0