8000 Fix handling of missing diagonal in symbolic factorizations by upsj · Pull Request #1263 · ginkgo-project/ginkgo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Fix handling of missing diagonal in symbolic factorizations #1263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion benchmark/solver/solver_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ DEFINE_string(solvers, "cg",
"Supported values are: bicgstab, bicg, cb_gmres_keep, "
"cb_gmres_reduce1, cb_gmres_reduce2, cb_gmres_integer, "
"cb_gmres_ireduce1, cb_gmres_ireduce2, cg, cgs, fcg, gmres, idr, "
"lower_trs, upper_trs, overhead");
"lower_trs, upper_trs, symm_direct, direct, overhead");

DEFINE_uint32(
nrhs, 1,
Expand Down Expand Up @@ -251,6 +251,19 @@ std::unique_ptr<gko::LinOpFactory> generate_solver(
return gko::solver::UpperTrs<etype>::build()
.with_num_rhs(FLAGS_nrhs)
.on(exec);
} else if (description == "symm_direct") {
return gko::experimental::solver::Direct<etype, itype>::build()
.with_factorization(
gko::experimental::factorization::Lu<etype, itype>::build()
.with_symmetric_sparsity(true)
.on(exec))
.on(exec);
} else if (description == "direct") {
return gko::experimental::solver::Direct<etype, itype>::build()
.with_factorization(
gko::experimental::factorization::Lu<etype, itype>::build().on(
exec))
.on(exec);
} else if (description == "overhead") {
return add_criteria_precond_finalize<gko::Overhead<etype>>(
exec, precond, max_iters);
Expand Down
34 changes: 23 additions & 11 deletions common/cuda_hip/factorization/cholesky_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ __global__ __launch_bounds__(default_block_size) void build_postorder_cols(
auto lower_end = row_begin;
for (auto nz = row_begin; nz < row_end; nz++) {
const auto col = cols[nz];
if (col <= row) {
if (col < row) {
postorder_cols[lower_end] = inv_postorder[cols[nz]];
lower_end++;
}
Expand Down Expand Up @@ -76,22 +76,28 @@ template <int subwarp_size, typename IndexType>
__global__
__launch_bounds__(default_block_size) void cholesky_symbolic_count_kernel(
IndexType num_rows, const IndexType* row_ptrs,
const IndexType* lower_ends, const IndexType* postorder_cols,
const IndexType* postorder_parent, IndexType* row_nnz)
const IndexType* lower_ends, const IndexType* inv_postorder,
const IndexType* postorder_cols, const IndexType* postorder_parent,
IndexType* row_nnz)
{
const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
if (row >= num_rows) {
return;
}
const auto row_begin = row_ptrs[row];
// instead of relying on the input containing a diagonal, we artificially
// introduce the diagonal entry (in postorder indexing) as a sentinel after
// the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
const auto lower_end = lower_ends[row];
const auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
const auto lane = subwarp.thread_rank();
IndexType count{};
for (auto nz = row_begin + lane; nz < lower_end - 1; nz += subwarp_size) {
for (auto nz = row_begin + lane; nz < lower_end; nz += subwarp_size) {
auto node = postorder_cols[nz];
const auto next_node = postorder_cols[nz + 1];
const auto next_node =
nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
while (node < next_node) {
count++;
node = postorder_parent[node];
Expand All @@ -110,25 +116,31 @@ template <int subwarp_size, typename IndexType>
__global__
__launch_bounds__(default_block_size) void cholesky_symbolic_factorize_kernel(
IndexType num_rows, const IndexType* row_ptrs,
const IndexType* lower_ends, const IndexType* postorder_cols,
const IndexType* postorder, const IndexType* postorder_parent,
const IndexType* out_row_ptrs, IndexType* out_cols)
const IndexType* lower_ends, const IndexType* inv_postorder,
const IndexType* postorder_cols, const IndexType* postorder,
const IndexType* postorder_parent, const IndexType* out_row_ptrs,
IndexType* out_cols)
{
const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
if (row >= num_rows) {
return;
}
const auto row_begin = row_ptrs[row];
// instead of relying on the input containing a diagonal, we artificially
// introduce the diagonal entry (in postorder indexing) as a sentinel after
// the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you remind me what's inv_postoder and postorder?

Copy link
Member Author
@upsj upsj Feb 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

postorder is the sequence of row numbers such that each child comes before its parents in the elimination tree (and thus a permutation), inv_postorder is the inverse permutation. diag_postorder is the rank of the row index in the postorder sequence, i.e. it corresponds to the (potentially missing) diagonal index in the post-order column index sequence. I'll add this explanation to the code

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

postorder[i]: i-th node is postorder[i]-th in original graph
row will be considered as inv_postorder[row] in new graph
is postorder_cols like inv_postorder[]?
col[i] will be considered as postorder_col[i] in the new graph

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from build_postorder_cols, it seems to be.

const auto lower_end = lower_ends[row];
const auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
const auto lane = subwarp.thread_rank();
const auto prefix_mask = (config::lane_mask_type(1) << lane) - 1;
auto out_base = out_row_ptrs[row];
for (auto base = row_begin; base < lower_end - 1; base += subwarp_size) {
for (auto base = row_begin; base < lower_end; base += subwarp_size) {
auto nz = base + lane;
auto node = nz < lower_end - 1 ? postorder_cols[nz] : -1;
const auto next_node = nz < lower_end - 1 ? postorder_cols[nz + 1] : -1;
auto node = nz < lower_end ? postorder_cols[nz] : diag_postorder;
const auto next_node =
nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
bool pred = node < next_node;
auto mask = subwarp.ballot(pred);
while (mask) {
Expand Down
9 changes: 5 additions & 4 deletions core/factorization/symbolic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,11 @@ void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
const auto row_begin_it = out_col_idxs.begin() + out_row_ptrs[row];
const auto row_end_it = out_col_idxs.end();
std::sort(row_begin_it, row_end_it);
const auto row_diag_it =
std::lower_bound(row_begin_it, row_end_it, row);
assert(row_diag_it < row_end_it);
assert(*row_diag_it == row);
auto row_diag_it = std::lower_bound(row_begin_it, row_end_it, row);
// add diagonal if it's missing
if (row_diag_it == row_end_it || *row_diag_it != row) {
row_diag_it = out_col_idxs.insert(row_diag_it, row);
}
diags[row] = std::distance(out_col_idxs.begin(), row_diag_it);
}
const auto out_nnz = static_cast<size_type>(out_col_idxs.size());
Expand Down
6 changes: 3 additions & 3 deletions cuda/factoriza 9E81 tion/cholesky_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void cholesky_symbolic_count(
ceildiv(num_rows, default_block_size / config::warp_size);
cholesky_symbolic_count_kernel<config::warp_size>
<<<num_blocks, default_block_size>>>(num_rows, row_ptrs, lower_ends,
postorder_cols,
inv_postorder, postorder_cols,
postorder_parent, row_nnz);
}
}
Expand Down Expand Up @@ -149,8 +149,8 @@ void cholesky_symbolic_factorize(
ceildiv(num_rows, default_block_size / config::warp_size);
cholesky_symbolic_factorize_kernel<config::warp_size>
<<<num_blocks, default_block_size>>>(
num_rows, row_ptrs, lower_ends, postorder_cols, postorder,
postorder_parent, out_row_ptrs, out_cols);
num_rows, row_ptrs, lower_ends, inv_postorder, postorder_cols,
postorder, postorder_parent, out_row_ptrs, out_cols);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
22 changes: 17 additions & 5 deletions dpcpp/factorization/cholesky_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void cholesky_symbolic_count(
auto lower_end = row_begin;
for (auto nz = row_begin; nz < row_end; nz++) {
const auto col = cols[nz];
if (col <= row) {
if (col < row) {
postorder_cols[lower_end] = inv_postorder[cols[nz]];
lower_end++;
}
Expand All @@ -101,11 +101,17 @@ void cholesky_symbolic_count(
cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
const auto row = idx_id[0];
const auto row_begin = row_ptrs[row];
// instead of relying on the input containing a diagonal, we
// artificially introduce the diagonal entry (in postorder indexing)
// as a sentinel after the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
const auto lower_end = lower_ends[row];
IndexType count{};
for (auto nz = row_begin; nz < lower_end - 1; ++nz) {
for (auto nz = row_begin; nz < lower_end; ++nz) {
auto node = postorder_cols[nz];
const auto next_node = postorder_cols[nz + 1];
const auto next_node = nz < lower_end - 1
? postorder_cols[nz + 1]
: diag_postorder;
while (node < next_node) {
count++;
node = postorder_parent[node];
Expand Down Expand Up @@ -142,11 +148,17 @@ void cholesky_symbolic_factorize(
cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
const auto row = idx_id[0];
const auto row_begin = row_ptrs[row];
// instead of relying on the input containing a diagonal, we
// artificially introduce the diagonal entry (in postorder indexing)
// as a sentinel after the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
const auto lower_end = lower_ends[row];
auto out_nz = out_row_ptrs[row];
for (auto nz = row_begin; nz < lower_end - 1; ++nz) {
for (auto nz = row_begin; nz < lower_end; ++nz) {
auto node = postorder_cols[nz];
const auto next_node = postorder_cols[nz + 1];
const auto next_node = nz < lower_end - 1
? postorder_cols[nz + 1]
: diag_postorder;
while (node < next_node) {
out_cols[out_nz] = postorder[node];
out_nz++;
Expand Down
6 changes: 3 additions & 3 deletions hip/factorization/cholesky_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void cholesky_symbolic_count(
ceildiv(num_rows, default_block_size / config::warp_size);
cholesky_symbolic_count_kernel<config::warp_size>
<<<num_blocks, default_block_size>>>(num_rows, row_ptrs, lower_ends,
postorder_cols,
inv_postorder, postorder_cols,
postorder_parent, row_nnz);
}
}
Expand Down Expand Up @@ -149,8 +149,8 @@ void cholesky_symbolic_factorize(
ceildiv(num_rows, default_block_size / config::warp_size);
cholesky_symbolic_factorize_kernel<config::warp_size>
<<<num_blocks, default_block_size>>>(
num_rows, row_ptrs, lower_ends, postorder_cols, postorder,
postorder_parent, out_row_ptrs, out_cols);
num_rows, row_ptrs, lower_ends, inv_postorder, postorder_cols,
postorder, postorder_parent, out_row_ptrs, out_cols);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
24 changes: 16 additions & 8 deletions omp/factorization/cholesky_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,26 @@ void cholesky_symbolic_count(
for (IndexType row = 0; row < num_rows; row++) {
const auto row_begin = row_ptrs[row];
const auto row_end = row_ptrs[row + 1];
// transform lower triangular entries into sorted postorder
// instead of relying on the input containing a diagonal, we
// artificially introduce the diagonal entry (in postorder indexing) as
// a sentinel after the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
// transform strictly lower triangular entries into sorted postorder
auto lower_end = row_begin;
for (auto nz = row_begin; nz < row_end; nz++) {
const auto col = cols[nz];
if (col <= row) {
if (col < row) {
postorder_cols[lower_end] = inv_postorder[col];
lower_end++;
}
}
std::sort(postorder_cols + row_begin, postorder_cols + lower_end);
// the subtree root should be the last entry as a sentinel
GKO_ASSERT(postorder_cols[lower_end - 1] == inv_postorder[row]);
// Now move from each node to its LCA with other nodes to cut off a path
IndexType count{};
for (auto nz = row_begin; nz < lower_end - 1; nz++) {
for (auto nz = row_begin; nz < lower_end; nz++) {
auto node = postorder_cols[nz];
const auto next_node = postorder_cols[nz + 1];
const auto next_node =
nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
// move upwards until we find the LCA with next_node
while (node < next_node) {
count++;
Expand Down Expand Up @@ -128,12 +131,17 @@ void cholesky_symbolic_factorize(
for (IndexType row = 0; row < num_rows; row++) {
const auto row_begin = row_ptrs[row];
const auto row_end = row_ptrs[row + 1];
// instead of relying on the input containing a diagonal, we
// artificially introduce the diagonal entry (in postorder indexing) as
// a sentinel after the last lower triangular entry.
const auto diag_postorder = inv_postorder[row];
const auto lower_end = lower_ends[row];
// Now move from each node to its LCA with other nodes to cut off a path
auto out_nz = out_row_ptrs[row];
for (auto nz = row_begin; nz < lower_end - 1; nz++) {
for (auto nz = row_begin; nz < lower_end; nz++) {
auto node = postorder_cols[nz];
const auto next_node = postorder_cols[nz + 1];
const auto next_node =
nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
// move upwards until we find the LCA with next_node
while (node < next_node) {
out_cols[out_nz] = postorder[node];
Expand Down
72 changes: 70 additions & 2 deletions reference/test/factorization/cholesky_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ TYPED_TEST(Cholesky, KernelSymbolicCountSeparable)
{0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 1},
{0, 0, 0, 0, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 1, 0, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
{0, 0, 0, 0, 1, 0, 1, 0, 1, 1}},
Expand All @@ -189,7 +189,7 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeSeparable)
{0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 1},
{0, 0, 0, 0, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 1, 0, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
{0, 0, 0, 0, 1, 0, 1, 0, 1, 1}},
Expand Down Expand Up @@ -219,6 +219,74 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeSeparable)
}


TYPED_TEST(Cholesky, KernelSymbolicCountMissingDiagonal)
{
using matrix_type = typename TestFixture::matrix_type;
using index_type = typename TestFixture::index_type;
auto mtx = gko::initialize<typename TestFixture::matrix_type>(
{{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 1, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 1, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
{0, 0, 0, 0, 0, 0, 1, 0, 1, 0}},
this->ref);
std::unique_ptr<gko::factorization::elimination_forest<index_type>> forest;
gko::factorization::compute_elim_forest(mtx.get(), forest);
gko::array<index_type> row_nnz{this->ref, 10};

gko::kernels::reference::cholesky::cholesky_symbolic_count(
this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp);

GKO_ASSERT_ARRAY_EQ(row_nnz, I<index_type>({1, 1, 3, 2, 2, 2, 2, 2, 1, 4}));
}


TYPED_TEST(Cholesky, KernelSymbolicFactorizeMissingDiagonal)
{
using matrix_type = typename TestFixture::matrix_type;
using index_type = typename TestFixture::index_type;
auto mtx = gko::initialize<typename TestFixture::matrix_type>(
{{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 1, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 1, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
{0, 0, 0, 0, 0, 0, 1, 0, 1, 0}},
this->ref);
std::unique_ptr<gko::factorization::elimination_forest<index_type>> forest;
gko::factorization::compute_elim_forest(mtx.get(), forest);
auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 20);
gko::kernels::reference::cholesky::cholesky_symbolic_count(
this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp);
gko::kernels::reference::components::prefix_sum(
this->ref, l_factor->get_row_ptrs(), 11);

gko::kernels::reference::cholesky::cholesky_symbolic_factorize(
this->ref, mtx.get(), *forest, l_factor.get(), this->tmp);

GKO_ASSERT_MTX_EQ_SPARSITY(l_factor,
l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{1., 1., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 1., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 1., 1., 0., 0., 0., 0.},
{0., 0., 0., 0., 0., 1., 1., 0., 0., 0.},
{0., 0., 0., 0., 0., 0., 1., 1., 0., 0.},
{0., 0., 0., 0., 0., 0., 0., 0., 1., 0.},
{0., 0., 0., 0., 0., 0., 1., 1., 1., 1.}}));
}


TYPED_TEST(Cholesky, KernelSymbolicCountAni1)
{
using matrix_type = typename TestFixture::matrix_type;
Expand Down
26 changes: 26 additions & 0 deletions reference/test/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,32 @@ TYPED_TEST(Lu, SymbolicLUWorks)
}


TYPED_TEST(Lu, SymbolicLUWorksWithMissingDiagonal)
{
using matrix_type = typename TestFixture::matrix_type;
auto mtx = gko::initialize<matrix_type>({{1, 1, 1, 0, 0, 0},
{1, 0, 1, 0, 0, 0},
{1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 0, 1, 0, 1},
{0, 0, 0, 0, 1, 0}},
this->ref);
auto expected = gko::initialize<matrix_type>({{1, 1, 1, 0, 0, 0},
{1, 1, 1, 0, 0, 0},
{1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 0, 1, 1, 1},
{0, 0, 0, 0, 1, 1}},
this->ref);


std::unique_ptr<matrix_type> lu;
gko::factorization::symbolic_lu(mtx.get(), lu);

GKO_ASSERT_MTX_EQ_SPARSITY(lu, expected);
}


TYPED_TEST(Lu, KernelInitializeWorks)
{
using value_type = typename TestFixture::value_type;
Expand Down
Loading
0