ginkgo-project · yhmtsai · Jul 2, 2022 · Jun 20, 2022 · Jun 28, 2022 · Jun 30, 2022
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -46,6 +46,14 @@ function(ginkgo_benchmark_hipsparse_linops type def)
     target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
 endfunction()
 
+function(ginkgo_benchmark_onemkl_linops type def)
+    add_library(onemkl_linops_${type} utils/dpcpp_linops.dp.cpp)
+    # make the dependency public to catch issues
+    target_compile_definitions(onemkl_linops_${type} PUBLIC ${def})
+    target_compile_features(onemkl_linops_${type} PRIVATE cxx_std_17)
+    target_link_libraries(onemkl_linops_${type} PRIVATE Ginkgo::ginkgo MKL::MKL_DPCPP)
+endfunction()
+
 
 # Generates an executable for one precision. Each executable will be linked to
 # `ginkgo`, `gflags` and `rapidjson`.
@@ -78,6 +86,10 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty
             target_compile_definitions("${name}" PRIVATE HAS_HIP=1)
             target_link_libraries("${name}" hipsparse_linops_${type})
         endif()
+        if (GINKGO_BUILD_DPCPP)
+            target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1)
+            target_link_libraries("${name}" onemkl_linops_${type})
+        endif()
     endif()
 endfunction(ginkgo_add_single_benchmark_executable)
 
@@ -127,6 +139,12 @@ if (GINKGO_BUILD_HIP)
     target_link_libraries(hip_timer ginkgo)
 endif()
 
+if (GINKGO_BUILD_DPCPP)
+    ginkgo_benchmark_onemkl_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
+    ginkgo_benchmark_onemkl_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
+    ginkgo_benchmark_onemkl_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
+    ginkgo_benchmark_onemkl_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
+endif()
 
 add_subdirectory(blas)
 add_subdirectory(conversions)

diff --git a/benchmark/utils/cuda_linops.cu b/benchmark/utils/cuda_linops.cu
@@ -72,16 +72,16 @@ public:
     // uses gko::array to allocate buffer.
     std::shared_ptr<const gko::CudaExecutor> get_gpu_exec() const
     {
-        return gpu_exec_;
+        return std::dynamic_pointer_cast<const gko::CudaExecutor>(
+            this->get_executor());
     }
 
 protected:
     CusparseBase(std::shared_ptr<const gko::Executor> exec,
                  const gko::dim<2>& size = gko::dim<2>{})
         : gko::LinOp(exec, size)
     {
-        gpu_exec_ = std::dynamic_pointer_cast<const gko::CudaExecutor>(exec);
-        if (gpu_exec_ == nullptr) {
+        if (this->get_gpu_exec() == nullptr) {
             GKO_NOT_IMPLEMENTED;
         }
         this->initialize_descr();
@@ -95,15 +95,14 @@ protected:
     {
         if (this != &other) {
             gko::LinOp::operator=(other);
-            this->gpu_exec_ = other.gpu_exec_;
             this->initialize_descr();
         }
         return *this;
     }
 
     void initialize_descr()
     {
-        const auto id = this->gpu_exec_->get_device_id();
+        const auto id = this->get_gpu_exec()->get_device_id();
         gko::cuda::device_guard g{id};
         this->descr_ = handle_manager<cusparseMatDescr>(
             gko::kernels::cuda::cusparse::create_mat_descr(),
@@ -114,7 +113,6 @@ protected:
     }
 
 private:
-    std::shared_ptr<const gko::CudaExecutor> gpu_exec_;
     template <typename T>
     using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
     handle_manager<cusparseMatDescr> descr_;
@@ -152,7 +150,7 @@ public:
     void read(const mat_data& data) override
     {
         csr_->read(data);
-        this->set_size(gko::dim<2>{csr_->get_size()});
+        this->set_size(csr_->get_size());
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -227,7 +225,7 @@ public:
     void read(const mat_data& data) override
     {
         csr_->read(data);
-        this->set_size(gko::dim<2>{csr_->get_size()});
+        this->set_size(csr_->get_size());
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -303,7 +301,7 @@ public:
     void read(const mat_data& data) override
     {
         csr_->read(data);
-        this->set_size(gko::dim<2>{csr_->get_size()});
+        this->set_size(csr_->get_size());
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -383,7 +381,7 @@ public:
     void read(const mat_data& data) override
     {
         csr_->read(data);
-        this->set_size(gko::dim<2>{csr_->get_size()});
+        this->set_size(csr_->get_size());
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -492,7 +490,7 @@ public:
         auto t_csr = csr::create(this->get_executor(),
                                  std::make_shared<typename csr::classical>());
         t_csr->read(data);
-        this->set_size(gko::dim<2>{t_csr->get_size()});
+        this->set_size(t_csr->get_size());
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::cuda::device_guard g{id};
@@ -638,7 +636,7 @@ public:
     {
         using gko::kernels::cuda::as_culibs_type;
         csr_->read(data);
-        this->set_size(gko::dim<2>{csr_->get_size()});
+        this->set_size(csr_->get_size());
         GKO_ASSERT_NO_CUSPARSE_ERRORS(
             cusparseCreateCsr(&mat_, csr_->get_size()[0], csr_->get_size()[1],
                               csr_->get_num_stored_elements(),
@@ -731,7 +729,7 @@ public:
     {
         using gko::kernels::cuda::as_culibs_type;
         coo_->read(data);
-        this->set_size(gko::dim<2>{coo_->get_size()});
+        this->set_size(coo_->get_size());
         GKO_ASSERT_NO_CUSPARSE_ERRORS(
             cusparseCreateCoo(&mat_, coo_->get_size()[0], coo_->get_size()[1],
                               coo_->get_num_stored_elements(),

diff --git a/benchmark/utils/dpcpp_linops.dp.cpp b/benchmark/utils/dpcpp_linops.dp.cpp
@@ -0,0 +1,191 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <memory>
+
+
+#include <oneapi/mkl.hpp>
+
+
+#include "benchmark/utils/sparselib_linops.hpp"
+#include "benchmark/utils/types.hpp"
+
+
+class onemkl_csr {};
+class onemkl_optimized_csr {};
+
+
+namespace detail {
+
+
+inline oneapi::mkl::sparse::matrix_handle_t create_mat_handle()
+{
+    oneapi::mkl::sparse::matrix_handle_t mat_handle;
+    oneapi::mkl::sparse::init_matrix_handle(&mat_handle);
+    return mat_handle;
+}
+
+
+class OnemklBase : public gko::LinOp {
+public:
+    oneapi::mkl::sparse::matrix_handle_t get_mat_handle() const
+    {
+        return this->mat_handle_.get();
+    }
+
+    std::shared_ptr<const gko::DpcppExecutor> get_device_exec() const
+    {
+        return std::dynamic_pointer_cast<const gko::DpcppExecutor>(
+            this->get_executor());
+    }
+
+protected:
+    void apply_impl(const gko::LinOp*, const gko::LinOp*, const gko::LinOp*,
+                    gko::LinOp*) const override
+    {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    void initialize_mat_handle()
+    {
+        mat_handle_ = handle_manager<oneapi::mkl::sparse::matrix_handle>(
+            create_mat_handle(),
+            [](oneapi::mkl::sparse::matrix_handle_t mat_handle) {
+                oneapi::mkl::sparse::release_matrix_handle(&mat_handle);
+            });
+    }
+
+    OnemklBase(std::shared_ptr<const gko::Executor> exec,
+               const gko::dim<2>& size = gko::dim<2>{})
+        : gko::LinOp(exec, size)
+    {
+        if (this->get_device_exec() == nullptr) {
+            GKO_NOT_IMPLEMENTED;
+        }
+        this->initialize_mat_handle();
+    }
+
+    ~OnemklBase() = default;
+
+    OnemklBase(const OnemklBase& other) = delete;
+
+    OnemklBase& operator=(const OnemklBase& other)
+    {
+        if (this != &other) {
+            gko::LinOp::operator=(other);
+            this->initialize_mat_handle();
+        }
+        return *this;
+    }
+
+private:
+    template <typename T>
+    using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
+    handle_manager<oneapi::mkl::sparse::matrix_handle> mat_handle_;
+};
+
+
+template <bool optimized = false, typename ValueType = gko::default_precision,
+          typename IndexType = gko::int32>
+class OnemklCsr
+    : public gko::EnableLinOp<OnemklCsr<optimized, ValueType, IndexType>,
+                              OnemklBase>,
+      public gko::EnableCreateMethod<
+          OnemklCsr<optimized, ValueType, IndexType>>,
+      public gko::ReadableFromMatrixData<ValueType, IndexType> {
+    friend class gko::EnableCreateMethod<OnemklCsr>;
+    friend class gko::EnablePolymorphicObject<OnemklCsr, OnemklBase>;
+
+public:
+    using Csr = gko::matrix::Csr<ValueType, IndexType>;
+    using mat_data = gko::matrix_data<ValueType, IndexType>;
+    void read(const mat_data& data) override
+    {
+        csr_->read(data);
+        this->set_size(csr_->get_size());
+
+        oneapi::mkl::sparse::set_csr_data(
+            this->get_mat_handle(), static_cast<int>(this->get_size()[0]),
+            static_cast<int>(this->get_size()[1]),
+            oneapi::mkl::index_base::zero, csr_->get_row_ptrs(),
+            csr_->get_col_idxs(), csr_->get_values());
+        if (optimized) {
+            // need the last argument {} to make sure that it uses USM version.
+            oneapi::mkl::sparse::optimize_gemv(
+                *(this->get_device_exec()->get_queue()), trans_,
+                this->get_mat_handle(), {});
+        }
+    }
+
+    gko::size_type get_num_stored_elements() const noexcept
+    {
+        return csr_->get_num_stored_elements();
+    }
+
+protected:
+    void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
+    {
+        auto dense_b = gko::as<gko::matrix::Dense<ValueType>>(b);
+        auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
+        auto db = dense_b->get_const_values();
+        auto dx = dense_x->get_values();
+
+        oneapi::mkl::sparse::gemv(
+            *(this->get_device_exec()->get_queue()), trans_,
+            gko::one<ValueType>(), this->get_mat_handle(),
+            const_cast<ValueType*>(db), gko::zero<ValueType>(), dx);
+    }
+
+    OnemklCsr(std::shared_ptr<const gko::Executor> exec,
+              const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<OnemklCsr, OnemklBase>(exec, size),
+          csr_(std::move(
+              Csr::create(exec, std::make_shared<typename Csr::classical>()))),
+          trans_(oneapi::mkl::transpose::nontrans)
+    {}
+
+
+private:
+    oneapi::mkl::transpose trans_;
+    std::shared_ptr<Csr> csr_;
+};
+
+
+}  // namespace detail
+
+
+IMPL_CREATE_SPARSELIB_LINOP(onemkl_csr, detail::OnemklCsr<false, etype, itype>);
+IMPL_CREATE_SPARSELIB_LINOP(onemkl_optimized_csr,
+                            detail::OnemklCsr<true, etype, itype>);
diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp
@@ -66,6 +66,9 @@ std::string available_format =
     ", hipsparse_csr, hipsparse_csrmm, hipsparse_coo, hipsparse_ell, "
     "hipsparse_hybrid"
 #endif  // HAS_HIP
+#ifdef HAS_DPCPP
+    ", onemkl_csr, onemkl_optimized_csr"
+#endif  // HAS_DPCPP
     ".\n";
 
 std::string format_description =
@@ -124,8 +127,13 @@ std::string format_description =
     "hipsparse_coo: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
     "               with HIPSPARSE_HYB_PARTITION_USER\n"
     "hipsparse_ell: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
-    "               with HIPSPARSE_HYB_PARTITION_MAX"
+    "               with HIPSPARSE_HYB_PARTITION_MAX\n"
 #endif  // HAS_HIP
+#ifdef HAS_DPCPP
+    "onemkl_csr: oneMKL Csr SpMV\n"
+    "onemkl_optimized_csr: oneMKL optimized Csr SpMV using optimize_gemv after "
+    "reading the matrix"
+#endif  // HAS_DPCPP
     ;
 
 std::string format_command =
@@ -328,6 +336,10 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
         {"hipsparse_coo", read_splib_matrix_from_data<hipsparse_coo>},
         {"hipsparse_ell", read_splib_matrix_from_data<hipsparse_ell>},
 #endif  // HAS_HIP
+#ifdef HAS_DPCPP
+        {"onemkl_csr", read_splib_matrix_from_data<onemkl_csr>},
+        {"onemkl_optimized_csr", read_splib_matrix_from_data<onemkl_optimized_csr>},
+#endif  // HAS_DPCPP
         {"hybrid", read_matrix_from_data<hybrid>},
         {"hybrid0",
          READ_MATRIX(hybrid, std::make_shared<hybrid::imbalance_limit>(0))},