Ginkgo Generated from branch based on master. Ginkgo version 1.8.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
The par-ilu-convergence program

The ParILU convergence example.

This example depends on simple-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

This example can be used to inspect the convergence behavior of parallel incomplete factorizations. *

The commented program

template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
The Error class is used to report exceptional behaviour in library functions.
Definition exception.hpp:58
virtual const char * what() const noexcept override
Returns a human-readable string with a more detailed description of the error.
Definition exception.hpp:75
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:22
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
constexpr auto squared_norm(const T &x) -> decltype(real(conj(x) *x))
Returns the squared norm of the object.
Definition math.hpp:1057
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:127
std::vector< nonzero_type > nonzeros
A vector of tuples storing the non-zeros of the matrix.
Definition matrix_data.hpp:454
void sort_row_major()
Sorts the nonzero vector so the values follow row-major order.
Definition matrix_data.hpp:459

print usage message

if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}

generate executor based on first argument

auto exec = try_generate([&] { return executors.at(argv[1])(); });

set matrix and preconditioner name with default values

std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);

load matrix file into Csr format

auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto minus_one =
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
ParIC is an incomplete Cholesky factorization which is computed in parallel.
Definition par_ic.hpp:70
ParICT is an incomplete threshold-based Cholesky factorization which is computed in parallel.
Definition par_ict.hpp:70
ParILU is an incomplete LU factorization which is computed in parallel.
Definition par_ilu.hpp:71
ParILUT is an incomplete threshold-based LU factorization which is computed in parallel.
Definition par_ilut.hpp:73
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1540
@ factory
LinOpFactory events.
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:775
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:86
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition utils_helper.hpp:175
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:160
std::decay_t< T > * as(U *obj)
Performs polymorphic type conversion.
Definition utils_helper.hpp:309
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:226

Results

This is the expected output:

Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<max-iterations>] [<num-repetitions>] [fill-in-limit]

When specifying an executor:

Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;

Comments about programming and debugging

The plain program

#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <ginkgo/ginkgo.hpp>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
auto exec = try_generate([&] { return executors.at(argv[1])(); });
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto minus_one =
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1345