Ginkgo Generated from branch based on master. Ginkgo version 1.8.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51
52namespace detail {
53
54
55template <typename ValueType = default_precision, typename IndexType = int32>
56void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
57
58
59} // namespace detail
60
61
100template <typename ValueType = default_precision, typename IndexType = int32>
101class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
102 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
103 public ConvertibleTo<Dense<ValueType>>,
104 public ConvertibleTo<Coo<ValueType, IndexType>>,
105 public ConvertibleTo<Ell<ValueType, IndexType>>,
106 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
107 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
108 public ConvertibleTo<Sellp<ValueType, IndexType>>,
109 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
110 public DiagonalExtractable<ValueType>,
111 public ReadableFromMatrixData<ValueType, IndexType>,
112 public WritableToMatrixData<ValueType, IndexType>,
113 public Transposable,
114 public Permutable<IndexType>,
116 remove_complex<Csr<ValueType, IndexType>>>,
117 public ScaledIdentityAddable {
118 friend class EnablePolymorphicObject<Csr, LinOp>;
119 friend class Coo<ValueType, IndexType>;
120 friend class Dense<ValueType>;
121 friend class Diagonal<ValueType>;
122 friend class Ell<ValueType, IndexType>;
123 friend class Hybrid<ValueType, IndexType>;
124 friend class Sellp<ValueType, IndexType>;
125 friend class SparsityCsr<ValueType, IndexType>;
126 friend class Fbcsr<ValueType, IndexType>;
127 friend class CsrBuilder<ValueType, IndexType>;
128 friend class Csr<to_complex<ValueType>, IndexType>;
129
130public:
131 using EnableLinOp<Csr>::convert_to;
132 using EnableLinOp<Csr>::move_to;
133 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
134 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
135 using ConvertibleTo<Dense<ValueType>>::convert_to;
136 using ConvertibleTo<Dense<ValueType>>::move_to;
137 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
139 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
149 using ReadableFromMatrixData<ValueType, IndexType>::read;
150
151 using value_type = ValueType;
152 using index_type = IndexType;
153 using transposed_type = Csr<ValueType, IndexType>;
154 using mat_data = matrix_data<ValueType, IndexType>;
155 using device_mat_data = device_matrix_data<ValueType, IndexType>;
156 using absolute_type = remove_complex<Csr>;
157
158 class automatical;
159
167 friend class automatical;
168
169 public:
175 strategy_type(std::string name) : name_(name) {}
176
177 virtual ~strategy_type() = default;
178
184 std::string get_name() { return name_; }
185
192 virtual void process(const array<index_type>& mtx_row_ptrs,
193 array<index_type>* mtx_srow) = 0;
194
202 virtual int64_t clac_size(const int64_t nnz) = 0;
203
208 virtual std::shared_ptr<strategy_type> copy() = 0;
209
210 protected:
211 void set_name(std::string name) { name_ = name; }
212
213 private:
214 std::string name_;
215 };
216
223 class classical : public strategy_type {
224 public:
228 classical() : strategy_type("classical"), max_length_per_row_(0) {}
229
230 void process(const array<index_type>& mtx_row_ptrs,
231 array<index_type>* mtx_srow) override
232 {
233 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
234 array<index_type> row_ptrs_host(host_mtx_exec);
235 const bool is_mtx_on_host{host_mtx_exec ==
236 mtx_row_ptrs.get_executor()};
237 const index_type* row_ptrs{};
238 if (is_mtx_on_host) {
239 row_ptrs = mtx_row_ptrs.get_const_data();
240 } else {
241 row_ptrs_host = mtx_row_ptrs;
242 row_ptrs = row_ptrs_host.get_const_data();
243 }
244 auto num_rows = mtx_row_ptrs.get_size() - 1;
245 max_length_per_row_ = 0;
246 for (size_type i = 0; i < num_rows; i++) {
247 max_length_per_row_ = std::max(max_length_per_row_,
248 row_ptrs[i + 1] - row_ptrs[i]);
249 }
250 }
251
252 int64_t clac_size(const int64_t nnz) override { return 0; }
253
254 index_type get_max_length_per_row() const noexcept
255 {
256 return max_length_per_row_;
257 }
258
259 std::shared_ptr<strategy_type> copy() override
260 {
261 return std::make_shared<classical>();
262 }
263
264 private:
265 index_type max_length_per_row_;
266 };
267
273 class merge_path : public strategy_type {
274 public:
278 merge_path() : strategy_type("merge_path") {}
279
280 void process(const array<index_type>& mtx_row_ptrs,
281 array<index_type>* mtx_srow) override
282 {}
283
284 int64_t clac_size(const int64_t nnz) override { return 0; }
285
286 std::shared_ptr<strategy_type> copy() override
287 {
288 return std::make_shared<merge_path>();
289 }
290 };
291
298 class cusparse : public strategy_type {
299 public:
303 cusparse() : strategy_type("cusparse") {}
304
305 void process(const array<index_type>& mtx_row_ptrs,
306 array<index_type>* mtx_srow) override
307 {}
308
309 int64_t clac_size(const int64_t nnz) override { return 0; }
310
311 std::shared_ptr<strategy_type> copy() override
312 {
313 return std::make_shared<cusparse>();
314 }
315 };
316
322 class sparselib : public strategy_type {
323 public:
327 sparselib() : strategy_type("sparselib") {}
328
329 void process(const array<index_type>& mtx_row_ptrs,
330 array<index_type>* mtx_srow) override
331 {}
332
333 int64_t clac_size(const int64_t nnz) override { return 0; }
334
335 std::shared_ptr<strategy_type> copy() override
336 {
337 return std::make_shared<sparselib>();
338 }
339 };
340
345 public:
352 [[deprecated]] load_balance()
353 : load_balance(std::move(
355 {}
356
362 load_balance(std::shared_ptr<const CudaExecutor> exec)
363 : load_balance(exec->get_num_warps(), exec->get_warp_size())
364 {}
365
371 load_balance(std::shared_ptr<const HipExecutor> exec)
372 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
373 {}
374
382 load_balance(std::shared_ptr<const DpcppExecutor> exec)
383 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
384 {}
385
397 load_balance(int64_t nwarps, int warp_size = 32,
398 bool cuda_strategy = true,
399 std::string strategy_name = "none")
400 : strategy_type("load_balance"),
401 nwarps_(nwarps),
402 warp_size_(warp_size),
403 cuda_strategy_(cuda_strategy),
404 strategy_name_(strategy_name)
405 {}
406
407 void process(const array<index_type>& mtx_row_ptrs,
408 array<index_type>* mtx_srow) override
409 {
410 auto nwarps = mtx_srow->get_size();
411
412 if (nwarps > 0) {
413 auto host_srow_exec = mtx_srow->get_executor()->get_master();
414 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
415 const bool is_srow_on_host{host_srow_exec ==
416 mtx_srow->get_executor()};
417 const bool is_mtx_on_host{host_mtx_exec ==
418 mtx_row_ptrs.get_executor()};
419 array<index_type> row_ptrs_host(host_mtx_exec);
420 array<index_type> srow_host(host_srow_exec);
421 const index_type* row_ptrs{};
422 index_type* srow{};
423 if (is_srow_on_host) {
424 srow = mtx_srow->get_data();
425 } else {
426 srow_host = *mtx_srow;
427 srow = srow_host.get_data();
428 }
429 if (is_mtx_on_host) {
430 row_ptrs = mtx_row_ptrs.get_const_data();
431 } else {
432 row_ptrs_host = mtx_row_ptrs;
433 row_ptrs = row_ptrs_host.get_const_data();
434 }
435 for (size_type i = 0; i < nwarps; i++) {
436 srow[i] = 0;
437 }
438 const auto num_rows = mtx_row_ptrs.get_size() - 1;
439 const auto num_elems = row_ptrs[num_rows];
440 const auto bucket_divider =
441 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
442 for (size_type i = 0; i < num_rows; i++) {
443 auto bucket =
444 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
445 bucket_divider);
446 if (bucket < nwarps) {
447 srow[bucket]++;
448 }
449 }
450 // find starting row for thread i
451 for (size_type i = 1; i < nwarps; i++) {
452 srow[i] += srow[i - 1];
453 }
454 if (!is_srow_on_host) {
455 *mtx_srow = srow_host;
456 }
457 }
458 }
459
460 int64_t clac_size(const int64_t nnz) override
461 {
462 if (warp_size_ > 0) {
463 int multiple = 8;
464 if (nnz >= static_cast<int64_t>(2e8)) {
465 multiple = 2048;
466 } else if (nnz >= static_cast<int64_t>(2e7)) {
467 multiple = 512;
468 } else if (nnz >= static_cast<int64_t>(2e6)) {
469 multiple = 128;
470 } else if (nnz >= static_cast<int64_t>(2e5)) {
471 multiple = 32;
472 }
473 if (strategy_name_ == "intel") {
474 multiple = 8;
475 if (nnz >= static_cast<int64_t>(2e8)) {
476 multiple = 256;
477 } else if (nnz >= static_cast<int64_t>(2e7)) {
478 multiple = 32;
479 }
480 }
481#if GINKGO_HIP_PLATFORM_HCC
482 if (!cuda_strategy_) {
483 multiple = 8;
484 if (nnz >= static_cast<int64_t>(1e7)) {
485 multiple = 64;
486 } else if (nnz >= static_cast<int64_t>(1e6)) {
487 multiple = 16;
488 }
489 }
490#endif // GINKGO_HIP_PLATFORM_HCC
491
492 auto nwarps = nwarps_ * multiple;
493 return min(ceildiv(nnz, warp_size_), nwarps);
494 } else {
495 return 0;
496 }
497 }
498
499 std::shared_ptr<strategy_type> copy() override
500 {
501 return std::make_shared<load_balance>(
502 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
503 }
504
505 private:
506 int64_t nwarps_;
507 int warp_size_;
508 bool cuda_strategy_;
509 std::string strategy_name_;
510 };
511
512 class automatical : public strategy_type {
513 public:
514 /* Use imbalance strategy when the maximum number of nonzero per row is
515 * more than 1024 on NVIDIA hardware */
516 const index_type nvidia_row_len_limit = 1024;
517 /* Use imbalance strategy when the matrix has more more than 1e6 on
518 * NVIDIA hardware */
519 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
520 /* Use imbalance strategy when the maximum number of nonzero per row is
521 * more than 768 on AMD hardware */
522 const index_type amd_row_len_limit = 768;
523 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
524 * hardware */
525 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
526 /* Use imbalance strategy when the maximum number of nonzero per row is
527 * more than 25600 on Intel hardware */
528 const index_type intel_row_len_limit = 25600;
529 /* Use imbalance strategy when the matrix has more more than 3e8 on
530 * Intel hardware */
531 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
532
533 public:
540 [[deprecated]] automatical()
541 : automatical(std::move(
543 {}
544
550 automatical(std::shared_ptr<const CudaExecutor> exec)
551 : automatical(exec->get_num_warps(), exec->get_warp_size())
552 {}
553
559 automatical(std::shared_ptr<const HipExecutor> exec)
560 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
561 {}
562
570 automatical(std::shared_ptr<const DpcppExecutor> exec)
571 : automatical(exec->get_num_subgroups(), 32, false, "intel")
572 {}
573
585 automatical(int64_t nwarps, int warp_size = 32,
586 bool cuda_strategy = true,
587 std::string strategy_name = "none")
588 : strategy_type("automatical"),
589 nwarps_(nwarps),
590 warp_size_(warp_size),
591 cuda_strategy_(cuda_strategy),
592 strategy_name_(strategy_name),
593 max_length_per_row_(0)
594 {}
595
596 void process(const array<index_type>& mtx_row_ptrs,
597 array<index_type>* mtx_srow) override
598 {
599 // if the number of stored elements is larger than <nnz_limit> or
600 // the maximum number of stored elements per row is larger than
601 // <row_len_limit>, use load_balance otherwise use classical
602 index_type nnz_limit = nvidia_nnz_limit;
603 index_type row_len_limit = nvidia_row_len_limit;
604 if (strategy_name_ == "intel") {
605 nnz_limit = intel_nnz_limit;
606 row_len_limit = intel_row_len_limit;
607 }
608#if GINKGO_HIP_PLATFORM_HCC
609 if (!cuda_strategy_) {
610 nnz_limit = amd_nnz_limit;
611 row_len_limit = amd_row_len_limit;
612 }
613#endif // GINKGO_HIP_PLATFORM_HCC
614 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
615 const bool is_mtx_on_host{host_mtx_exec ==
616 mtx_row_ptrs.get_executor()};
617 array<index_type> row_ptrs_host(host_mtx_exec);
618 const index_type* row_ptrs{};
619 if (is_mtx_on_host) {
620 row_ptrs = mtx_row_ptrs.get_const_data();
621 } else {
622 row_ptrs_host = mtx_row_ptrs;
623 row_ptrs = row_ptrs_host.get_const_data();
624 }
625 const auto num_rows = mtx_row_ptrs.get_size() - 1;
626 if (row_ptrs[num_rows] > nnz_limit) {
627 load_balance actual_strategy(nwarps_, warp_size_,
628 cuda_strategy_, strategy_name_);
629 if (is_mtx_on_host) {
630 actual_strategy.process(mtx_row_ptrs, mtx_srow);
631 } else {
632 actual_strategy.process(row_ptrs_host, mtx_srow);
633 }
634 this->set_name(actual_strategy.get_name());
635 } else {
636 index_type maxnum = 0;
637 for (size_type i = 0; i < num_rows; i++) {
638 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
639 }
640 if (maxnum > row_len_limit) {
641 load_balance actual_strategy(
642 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
643 if (is_mtx_on_host) {
644 actual_strategy.process(mtx_row_ptrs, mtx_srow);
645 } else {
646 actual_strategy.process(row_ptrs_host, mtx_srow);
647 }
648 this->set_name(actual_strategy.get_name());
649 } else {
650 classical actual_strategy;
651 if (is_mtx_on_host) {
652 actual_strategy.process(mtx_row_ptrs, mtx_srow);
653 max_length_per_row_ =
654 actual_strategy.get_max_length_per_row();
655 } else {
656 actual_strategy.process(row_ptrs_host, mtx_srow);
657 max_length_per_row_ =
658 actual_strategy.get_max_length_per_row();
659 }
660 this->set_name(actual_strategy.get_name());
661 }
662 }
663 }
664
665 int64_t clac_size(const int64_t nnz) override
666 {
667 return std::make_shared<load_balance>(
668 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
669 ->clac_size(nnz);
670 }
671
672 index_type get_max_length_per_row() const noexcept
673 {
674 return max_length_per_row_;
675 }
676
677 std::shared_ptr<strategy_type> copy() override
678 {
679 return std::make_shared<automatical>(
680 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
681 }
682
683 private:
684 int64_t nwarps_;
685 int warp_size_;
686 bool cuda_strategy_;
687 std::string strategy_name_;
688 index_type max_length_per_row_;
689 };
690
691 friend class Csr<next_precision<ValueType>, IndexType>;
692
693 void convert_to(
694 Csr<next_precision<ValueType>, IndexType>* result) const override;
695
696 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
697
698 void convert_to(Dense<ValueType>* other) const override;
699
700 void move_to(Dense<ValueType>* other) override;
701
702 void convert_to(Coo<ValueType, IndexType>* result) const override;
703
704 void move_to(Coo<ValueType, IndexType>* result) override;
705
706 void convert_to(Ell<ValueType, IndexType>* result) const override;
707
708 void move_to(Ell<ValueType, IndexType>* result) override;
709
710 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
711
712 void move_to(Fbcsr<ValueType, IndexType>* result) override;
713
714 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
715
716 void move_to(Hybrid<ValueType, IndexType>* result) override;
717
718 void convert_to(Sellp<ValueType, IndexType>* result) const override;
719
720 void move_to(Sellp<ValueType, IndexType>* result) override;
721
722 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
723
724 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
725
726 void read(const mat_data& data) override;
727
728 void read(const device_mat_data& data) override;
729
730 void read(device_mat_data&& data) override;
731
732 void write(mat_data& data) const override;
733
734 std::unique_ptr<LinOp> transpose() const override;
735
736 std::unique_ptr<LinOp> conj_transpose() const override;
737
752 std::unique_ptr<Csr> permute(
753 ptr_param<const Permutation<index_type>> permutation,
755
769 std::unique_ptr<Csr> permute(
770 ptr_param<const Permutation<index_type>> row_permutation,
771 ptr_param<const Permutation<index_type>> column_permutation,
772 bool invert = false) const;
773
783 std::unique_ptr<Csr> scale_permute(
786
799 std::unique_ptr<Csr> scale_permute(
801 row_permutation,
803 column_permutation,
804 bool invert = false) const;
805
806 std::unique_ptr<LinOp> permute(
807 const array<IndexType>* permutation_indices) const override;
808
809 std::unique_ptr<LinOp> inverse_permute(
810 const array<IndexType>* inverse_permutation_indices) const override;
811
812 std::unique_ptr<LinOp> row_permute(
813 const array<IndexType>* permutation_indices) const override;
814
815 std::unique_ptr<LinOp> column_permute(
816 const array<IndexType>* permutation_indices) const override;
817
818 std::unique_ptr<LinOp> inverse_row_permute(
819 const array<IndexType>* inverse_permutation_indices) const override;
820
821 std::unique_ptr<LinOp> inverse_column_permute(
822 const array<IndexType>* inverse_permutation_indices) const override;
823
824 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
825
826 std::unique_ptr<absolute_type> compute_absolute() const override;
827
829
834
835 /*
836 * Tests if all row entry pairs (value, col_idx) are sorted by column index
837 *
838 * @returns True if all row entry pairs (value, col_idx) are sorted by
839 * column index
840 */
841 bool is_sorted_by_column_index() const;
842
848 value_type* get_values() noexcept { return values_.get_data(); }
849
857 const value_type* get_const_values() const noexcept
858 {
859 return values_.get_const_data();
860 }
861
867 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
868
876 const index_type* get_const_col_idxs() const noexcept
877 {
878 return col_idxs_.get_const_data();
879 }
880
886 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
887
895 const index_type* get_const_row_ptrs() const noexcept
896 {
897 return row_ptrs_.get_const_data();
898 }
899
905 index_type* get_srow() noexcept { return srow_.get_data(); }
906
914 const index_type* get_const_srow() const noexcept
915 {
916 return srow_.get_const_data();
917 }
918
925 {
926 return srow_.get_size();
927 }
928
935 {
936 return values_.get_size();
937 }
938
943 std::shared_ptr<strategy_type> get_strategy() const noexcept
944 {
945 return strategy_;
946 }
947
953 void set_strategy(std::shared_ptr<strategy_type> strategy)
954 {
955 strategy_ = std::move(strategy->copy());
956 this->make_srow();
957 }
958
966 {
967 auto exec = this->get_executor();
968 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
969 this->scale_impl(make_temporary_clone(exec, alpha).get());
970 }
971
979 {
980 auto exec = this->get_executor();
981 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
982 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
983 }
984
993 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
994 std::shared_ptr<strategy_type> strategy);
995
1007 static std::unique_ptr<Csr> create(
1008 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1009 size_type num_nonzeros = {},
1010 std::shared_ptr<strategy_type> strategy = nullptr);
1011
1031 static std::unique_ptr<Csr> create(
1032 std::shared_ptr<const Executor> exec, const dim<2>& size,
1033 array<value_type> values, array<index_type> col_idxs,
1034 array<index_type> row_ptrs,
1035 std::shared_ptr<strategy_type> strategy = nullptr);
1036
1041 template <typename InputValueType, typename InputColumnIndexType,
1042 typename InputRowPtrType>
1043 GKO_DEPRECATED(
1044 "explicitly construct the gko::array argument instead of passing "
1045 "initializer lists")
1046 static std::unique_ptr<Csr> create(
1047 std::shared_ptr<const Executor> exec, const dim<2>& size,
1048 std::initializer_list<InputValueType> values,
1049 std::initializer_list<InputColumnIndexType> col_idxs,
1050 std::initializer_list<InputRowPtrType> row_ptrs)
1051 {
1052 return create(exec, size, array<value_type>{exec, std::move(values)},
1053 array<index_type>{exec, std::move(col_idxs)},
1054 array<index_type>{exec, std::move(row_ptrs)});
1055 }
1056
1072 static std::unique_ptr<const Csr> create_const(
1073 std::shared_ptr<const Executor> exec, const dim<2>& size,
1074 gko::detail::const_array_view<ValueType>&& values,
1075 gko::detail::const_array_view<IndexType>&& col_idxs,
1076 gko::detail::const_array_view<IndexType>&& row_ptrs,
1077 std::shared_ptr<strategy_type> strategy = nullptr);
1078
1091 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1092 const index_set<IndexType>& row_index_set,
1093 const index_set<IndexType>& column_index_set) const;
1094
1106 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1107 const span& row_span, const span& column_span) const;
1108
1113
1120
1124 Csr(const Csr&);
1125
1132
1133protected:
1134 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1135 size_type num_nonzeros = {},
1136 std::shared_ptr<strategy_type> strategy = nullptr);
1137
1138 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1139 array<value_type> values, array<index_type> col_idxs,
1140 array<index_type> row_ptrs,
1141 std::shared_ptr<strategy_type> strategy = nullptr);
1142
1143 void apply_impl(const LinOp* b, LinOp* x) const override;
1144
1145 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1146 LinOp* x) const override;
1147
1148 // TODO: This provides some more sane settings. Please fix this!
1149 static std::shared_ptr<strategy_type> make_default_strategy(
1150 std::shared_ptr<const Executor> exec)
1151 {
1152 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1153 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1154 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1155 std::shared_ptr<strategy_type> new_strategy;
1156 if (cuda_exec) {
1157 new_strategy = std::make_shared<automatical>(cuda_exec);
1158 } else if (hip_exec) {
1159 new_strategy = std::make_shared<automatical>(hip_exec);
1160 } else if (dpcpp_exec) {
1161 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1162 } else {
1163 new_strategy = std::make_shared<classical>();
1164 }
1165 return new_strategy;
1166 }
1167
1168 // TODO clean this up as soon as we improve strategy_type
1169 template <typename CsrType>
1170 void convert_strategy_helper(CsrType* result) const
1171 {
1172 auto strat = this->get_strategy().get();
1173 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1174 if (dynamic_cast<classical*>(strat)) {
1175 new_strat = std::make_shared<typename CsrType::classical>();
1176 } else if (dynamic_cast<merge_path*>(strat)) {
1177 new_strat = std::make_shared<typename CsrType::merge_path>();
1178 } else if (dynamic_cast<cusparse*>(strat)) {
1179 new_strat = std::make_shared<typename CsrType::cusparse>();
1180 } else if (dynamic_cast<sparselib*>(strat)) {
1181 new_strat = std::make_shared<typename CsrType::sparselib>();
1182 } else {
1183 auto rexec = result->get_executor();
1184 auto cuda_exec =
1185 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1186 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1187 auto dpcpp_exec =
1188 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1189 auto lb = dynamic_cast<load_balance*>(strat);
1190 if (cuda_exec) {
1191 if (lb) {
1192 new_strat =
1193 std::make_shared<typename CsrType::load_balance>(
1194 cuda_exec);
1195 } else {
1196 new_strat = std::make_shared<typename CsrType::automatical>(
1197 cuda_exec);
1198 }
1199 } else if (hip_exec) {
1200 if (lb) {
1201 new_strat =
1202 std::make_shared<typename CsrType::load_balance>(
1203 hip_exec);
1204 } else {
1205 new_strat = std::make_shared<typename CsrType::automatical>(
1206 hip_exec);
1207 }
1208 } else if (dpcpp_exec) {
1209 if (lb) {
1210 new_strat =
1211 std::make_shared<typename CsrType::load_balance>(
1212 dpcpp_exec);
1213 } else {
1214 new_strat = std::make_shared<typename CsrType::automatical>(
1215 dpcpp_exec);
1216 }
1217 } else {
1218 // Try to preserve this executor's configuration
1219 auto this_cuda_exec =
1220 std::dynamic_pointer_cast<const CudaExecutor>(
1221 this->get_executor());
1222 auto this_hip_exec =
1223 std::dynamic_pointer_cast<const HipExecutor>(
1224 this->get_executor());
1225 auto this_dpcpp_exec =
1226 std::dynamic_pointer_cast<const DpcppExecutor>(
1227 this->get_executor());
1228 if (this_cuda_exec) {
1229 if (lb) {
1230 new_strat =
1231 std::make_shared<typename CsrType::load_balance>(
1232 this_cuda_exec);
1233 } else {
1234 new_strat =
1235 std::make_shared<typename CsrType::automatical>(
1236 this_cuda_exec);
1237 }
1238 } else if (this_hip_exec) {
1239 if (lb) {
1240 new_strat =
1241 std::make_shared<typename CsrType::load_balance>(
1242 this_hip_exec);
1243 } else {
1244 new_strat =
1245 std::make_shared<typename CsrType::automatical>(
1246 this_hip_exec);
1247 }
1248 } else if (this_dpcpp_exec) {
1249 if (lb) {
1250 new_strat =
1251 std::make_shared<typename CsrType::load_balance>(
1252 this_dpcpp_exec);
1253 } else {
1254 new_strat =
1255 std::make_shared<typename CsrType::automatical>(
1256 this_dpcpp_exec);
1257 }
1258 } else {
1259 // FIXME: this changes strategies.
1260 // We had a load balance or automatical strategy from a non
1261 // HIP or Cuda executor and are moving to a non HIP or Cuda
1262 // executor.
1263 new_strat = std::make_shared<typename CsrType::classical>();
1264 }
1265 }
1266 }
1267 result->set_strategy(new_strat);
1268 }
1269
1273 void make_srow()
1274 {
1275 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1276 strategy_->process(row_ptrs_, &srow_);
1277 }
1278
1285 virtual void scale_impl(const LinOp* alpha);
1286
1293 virtual void inv_scale_impl(const LinOp* alpha);
1294
1295private:
1296 std::shared_ptr<strategy_type> strategy_;
1297 array<value_type> values_;
1298 array<index_type> col_idxs_;
1299 array<index_type> row_ptrs_;
1300 array<index_type> srow_;
1301
1302 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1303};
1304
1305
1306namespace detail {
1307
1308
1315template <typename ValueType, typename IndexType>
1316void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1317{
1318 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1319 using automatical = typename Csr<ValueType, IndexType>::automatical;
1320 auto strategy = result->get_strategy();
1321 auto executor = result->get_executor();
1322 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1323 if (auto exec =
1324 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1325 result->set_strategy(std::make_shared<load_balance>(exec));
1326 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1327 executor)) {
1328 result->set_strategy(std::make_shared<load_balance>(exec));
1329 }
1330 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1331 if (auto exec =
1332 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1333 result->set_strategy(std::make_shared<automatical>(exec));
1334 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1335 executor)) {
1336 result->set_strategy(std::make_shared<automatical>(exec));
1337 }
1338 }
1339}
1340
1341
1342} // namespace detail
1343} // namespace matrix
1344} // namespace gko
1345
1346
1347#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition polymorphic_object.hpp:471
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1485
The diagonal of a LinOp implementing this interface can be extracted.
Definition lin_op.hpp:744
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:795
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:880
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:663
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:616
Definition lin_op.hpp:118
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1338
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:485
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:235
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:606
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:819
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:434
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:661
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition logger.hpp:25
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition array.hpp:623
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:674
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:690
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:683
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:657
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:57
COO stores a matrix in the coordinate matrix format.
Definition ell.hpp:21
Definition csr.hpp:49
Definition csr.hpp:512
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:677
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:585
automatical()
Creates an automatical strategy.
Definition csr.hpp:540
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:665
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:550
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:596
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:570
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:559
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:223
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:230
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:259
classical()
Creates a classical strategy.
Definition csr.hpp:228
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:252
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:298
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:309
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:311
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:303
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:305
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:344
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:407
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:499
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:371
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:352
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:460
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:397
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:362
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:382
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:273
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:284
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:286
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:278
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:280
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:322
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:333
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:329
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:327
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:335
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:166
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:184
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:175
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:22
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:895
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
void read(device_mat_data &&data) override
Reads a matrix from a device_matrix_data structure.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:914
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:953
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:978
void read(const device_mat_data &data) override
Reads a matrix from a device_matrix_data structure.
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:905
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition csr.hpp:924
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:886
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:857
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:934
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:943
std::unique_ptr< LinOp > permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the symmetric row and column permutation of the Permutable object.
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:876
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:965
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:848
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:867
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition sparsity_csr.hpp:26
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:50
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:60
Fixed-block compressed sparse row storage matrix format.
Definition sparsity_csr.hpp:30
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:52
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:113
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:39
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:51
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:57
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:43
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:43
@ symmetric
The rows and columns will be permuted.
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:326
typename detail::next_precision_impl< T >::type next_precision
Obtains the next type in the singly-linked precision list.
Definition math.hpp:462
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:345
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:613
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:86
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:863
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:209
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:27
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:127
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:47