Ginkgo Generated from branch based on main. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51template <typename IndexType>
52class Permutation;
53
54
55namespace detail {
56
57
58template <typename ValueType = default_precision, typename IndexType = int32>
59void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60
61
62} // namespace detail
63
64
103template <typename ValueType = default_precision, typename IndexType = int32>
104class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
105 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107 public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
108#endif
109#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110 public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
111#endif
112 public ConvertibleTo<Dense<ValueType>>,
113 public ConvertibleTo<Coo<ValueType, IndexType>>,
114 public ConvertibleTo<Ell<ValueType, IndexType>>,
115 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
116 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
117 public ConvertibleTo<Sellp<ValueType, IndexType>>,
118 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
119 public DiagonalExtractable<ValueType>,
120 public ReadableFromMatrixData<ValueType, IndexType>,
121 public WritableToMatrixData<ValueType, IndexType>,
122 public Transposable,
123 public Permutable<IndexType>,
125 remove_complex<Csr<ValueType, IndexType>>>,
126 public ScaledIdentityAddable {
127 friend class EnablePolymorphicObject<Csr, LinOp>;
128 friend class Coo<ValueType, IndexType>;
129 friend class Dense<ValueType>;
130 friend class Diagonal<ValueType>;
131 friend class Ell<ValueType, IndexType>;
132 friend class Hybrid<ValueType, IndexType>;
133 friend class Sellp<ValueType, IndexType>;
134 friend class SparsityCsr<ValueType, IndexType>;
135 friend class Fbcsr<ValueType, IndexType>;
136 friend class CsrBuilder<ValueType, IndexType>;
137 friend class Csr<to_complex<ValueType>, IndexType>;
138
139public:
140 using EnableLinOp<Csr>::convert_to;
141 using EnableLinOp<Csr>::move_to;
142 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
143 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
144 using ConvertibleTo<Dense<ValueType>>::convert_to;
145 using ConvertibleTo<Dense<ValueType>>::move_to;
146 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
148 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
158 using ReadableFromMatrixData<ValueType, IndexType>::read;
159
160 using value_type = ValueType;
161 using index_type = IndexType;
162 using transposed_type = Csr<ValueType, IndexType>;
163 using mat_data = matrix_data<ValueType, IndexType>;
164 using device_mat_data = device_matrix_data<ValueType, IndexType>;
165 using absolute_type = remove_complex<Csr>;
166
167 class automatical;
168
176 friend class automatical;
177
178 public:
184 strategy_type(std::string name) : name_(name) {}
185
186 virtual ~strategy_type() = default;
187
193 std::string get_name() { return name_; }
194
201 virtual void process(const array<index_type>& mtx_row_ptrs,
202 array<index_type>* mtx_srow) = 0;
203
211 virtual int64_t clac_size(const int64_t nnz) = 0;
212
217 virtual std::shared_ptr<strategy_type> copy() = 0;
218
219 protected:
220 void set_name(std::string name) { name_ = name; }
221
222 private:
223 std::string name_;
224 };
225
232 class classical : public strategy_type {
233 public:
237 classical() : strategy_type("classical"), max_length_per_row_(0) {}
238
239 void process(const array<index_type>& mtx_row_ptrs,
240 array<index_type>* mtx_srow) override
241 {
242 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
243 array<index_type> row_ptrs_host(host_mtx_exec);
244 const bool is_mtx_on_host{host_mtx_exec ==
245 mtx_row_ptrs.get_executor()};
246 const index_type* row_ptrs{};
247 if (is_mtx_on_host) {
248 row_ptrs = mtx_row_ptrs.get_const_data();
249 } else {
250 row_ptrs_host = mtx_row_ptrs;
251 row_ptrs = row_ptrs_host.get_const_data();
252 }
253 auto num_rows = mtx_row_ptrs.get_size() - 1;
254 max_length_per_row_ = 0;
255 for (size_type i = 0; i < num_rows; i++) {
256 max_length_per_row_ = std::max(max_length_per_row_,
257 row_ptrs[i + 1] - row_ptrs[i]);
258 }
259 }
260
261 int64_t clac_size(const int64_t nnz) override { return 0; }
262
263 index_type get_max_length_per_row() const noexcept
264 {
265 return max_length_per_row_;
266 }
267
268 std::shared_ptr<strategy_type> copy() override
269 {
270 return std::make_shared<classical>();
271 }
272
273 private:
274 index_type max_length_per_row_;
275 };
276
282 class merge_path : public strategy_type {
283 public:
287 merge_path() : strategy_type("merge_path") {}
288
289 void process(const array<index_type>& mtx_row_ptrs,
290 array<index_type>* mtx_srow) override
291 {}
292
293 int64_t clac_size(const int64_t nnz) override { return 0; }
294
295 std::shared_ptr<strategy_type> copy() override
296 {
297 return std::make_shared<merge_path>();
298 }
299 };
300
307 class cusparse : public strategy_type {
308 public:
312 cusparse() : strategy_type("cusparse") {}
313
314 void process(const array<index_type>& mtx_row_ptrs,
315 array<index_type>* mtx_srow) override
316 {}
317
318 int64_t clac_size(const int64_t nnz) override { return 0; }
319
320 std::shared_ptr<strategy_type> copy() override
321 {
322 return std::make_shared<cusparse>();
323 }
324 };
325
331 class sparselib : public strategy_type {
332 public:
336 sparselib() : strategy_type("sparselib") {}
337
338 void process(const array<index_type>& mtx_row_ptrs,
339 array<index_type>* mtx_srow) override
340 {}
341
342 int64_t clac_size(const int64_t nnz) override { return 0; }
343
344 std::shared_ptr<strategy_type> copy() override
345 {
346 return std::make_shared<sparselib>();
347 }
348 };
349
354 public:
361 [[deprecated]] load_balance()
362 : load_balance(std::move(
364 {}
365
371 load_balance(std::shared_ptr<const CudaExecutor> exec)
372 : load_balance(exec->get_num_warps(), exec->get_warp_size())
373 {}
374
380 load_balance(std::shared_ptr<const HipExecutor> exec)
381 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
382 {}
383
391 load_balance(std::shared_ptr<const DpcppExecutor> exec)
392 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
393 {}
394
406 load_balance(int64_t nwarps, int warp_size = 32,
407 bool cuda_strategy = true,
408 std::string strategy_name = "none")
409 : strategy_type("load_balance"),
410 nwarps_(nwarps),
411 warp_size_(warp_size),
412 cuda_strategy_(cuda_strategy),
413 strategy_name_(strategy_name)
414 {}
415
416 void process(const array<index_type>& mtx_row_ptrs,
417 array<index_type>* mtx_srow) override
418 {
419 auto nwarps = mtx_srow->get_size();
420
421 if (nwarps > 0) {
422 auto host_srow_exec = mtx_srow->get_executor()->get_master();
423 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
424 const bool is_srow_on_host{host_srow_exec ==
425 mtx_srow->get_executor()};
426 const bool is_mtx_on_host{host_mtx_exec ==
427 mtx_row_ptrs.get_executor()};
428 array<index_type> row_ptrs_host(host_mtx_exec);
429 array<index_type> srow_host(host_srow_exec);
430 const index_type* row_ptrs{};
431 index_type* srow{};
432 if (is_srow_on_host) {
433 srow = mtx_srow->get_data();
434 } else {
435 srow_host = *mtx_srow;
436 srow = srow_host.get_data();
437 }
438 if (is_mtx_on_host) {
439 row_ptrs = mtx_row_ptrs.get_const_data();
440 } else {
441 row_ptrs_host = mtx_row_ptrs;
442 row_ptrs = row_ptrs_host.get_const_data();
443 }
444 for (size_type i = 0; i < nwarps; i++) {
445 srow[i] = 0;
446 }
447 const auto num_rows = mtx_row_ptrs.get_size() - 1;
448 const auto num_elems = row_ptrs[num_rows];
449 const auto bucket_divider =
450 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
451 for (size_type i = 0; i < num_rows; i++) {
452 auto bucket =
453 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
454 bucket_divider);
455 if (bucket < nwarps) {
456 srow[bucket]++;
457 }
458 }
459 // find starting row for thread i
460 for (size_type i = 1; i < nwarps; i++) {
461 srow[i] += srow[i - 1];
462 }
463 if (!is_srow_on_host) {
464 *mtx_srow = srow_host;
465 }
466 }
467 }
468
469 int64_t clac_size(const int64_t nnz) override
470 {
471 if (warp_size_ > 0) {
472 int multiple = 8;
473 if (nnz >= static_cast<int64_t>(2e8)) {
474 multiple = 2048;
475 } else if (nnz >= static_cast<int64_t>(2e7)) {
476 multiple = 512;
477 } else if (nnz >= static_cast<int64_t>(2e6)) {
478 multiple = 128;
479 } else if (nnz >= static_cast<int64_t>(2e5)) {
480 multiple = 32;
481 }
482 if (strategy_name_ == "intel") {
483 multiple = 8;
484 if (nnz >= static_cast<int64_t>(2e8)) {
485 multiple = 256;
486 } else if (nnz >= static_cast<int64_t>(2e7)) {
487 multiple = 32;
488 }
489 }
490#if GINKGO_HIP_PLATFORM_HCC
491 if (!cuda_strategy_) {
492 multiple = 8;
493 if (nnz >= static_cast<int64_t>(1e7)) {
494 multiple = 64;
495 } else if (nnz >= static_cast<int64_t>(1e6)) {
496 multiple = 16;
497 }
498 }
499#endif // GINKGO_HIP_PLATFORM_HCC
500
501 auto nwarps = nwarps_ * multiple;
502 return min(ceildiv(nnz, warp_size_), nwarps);
503 } else {
504 return 0;
505 }
506 }
507
508 std::shared_ptr<strategy_type> copy() override
509 {
510 return std::make_shared<load_balance>(
511 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
512 }
513
514 private:
515 int64_t nwarps_;
516 int warp_size_;
517 bool cuda_strategy_;
518 std::string strategy_name_;
519 };
520
521 class automatical : public strategy_type {
522 public:
523 /* Use imbalance strategy when the maximum number of nonzero per row is
524 * more than 1024 on NVIDIA hardware */
525 const index_type nvidia_row_len_limit = 1024;
526 /* Use imbalance strategy when the matrix has more more than 1e6 on
527 * NVIDIA hardware */
528 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
529 /* Use imbalance strategy when the maximum number of nonzero per row is
530 * more than 768 on AMD hardware */
531 const index_type amd_row_len_limit = 768;
532 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
533 * hardware */
534 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
535 /* Use imbalance strategy when the maximum number of nonzero per row is
536 * more than 25600 on Intel hardware */
537 const index_type intel_row_len_limit = 25600;
538 /* Use imbalance strategy when the matrix has more more than 3e8 on
539 * Intel hardware */
540 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
541
542 public:
549 [[deprecated]] automatical()
550 : automatical(std::move(
552 {}
553
559 automatical(std::shared_ptr<const CudaExecutor> exec)
560 : automatical(exec->get_num_warps(), exec->get_warp_size())
561 {}
562
568 automatical(std::shared_ptr<const HipExecutor> exec)
569 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
570 {}
571
579 automatical(std::shared_ptr<const DpcppExecutor> exec)
580 : automatical(exec->get_num_subgroups(), 32, false, "intel")
581 {}
582
594 automatical(int64_t nwarps, int warp_size = 32,
595 bool cuda_strategy = true,
596 std::string strategy_name = "none")
597 : strategy_type("automatical"),
598 nwarps_(nwarps),
599 warp_size_(warp_size),
600 cuda_strategy_(cuda_strategy),
601 strategy_name_(strategy_name),
602 max_length_per_row_(0)
603 {}
604
605 void process(const array<index_type>& mtx_row_ptrs,
606 array<index_type>* mtx_srow) override
607 {
608 // if the number of stored elements is larger than <nnz_limit> or
609 // the maximum number of stored elements per row is larger than
610 // <row_len_limit>, use load_balance otherwise use classical
611 index_type nnz_limit = nvidia_nnz_limit;
612 index_type row_len_limit = nvidia_row_len_limit;
613 if (strategy_name_ == "intel") {
614 nnz_limit = intel_nnz_limit;
615 row_len_limit = intel_row_len_limit;
616 }
617#if GINKGO_HIP_PLATFORM_HCC
618 if (!cuda_strategy_) {
619 nnz_limit = amd_nnz_limit;
620 row_len_limit = amd_row_len_limit;
621 }
622#endif // GINKGO_HIP_PLATFORM_HCC
623 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
624 const bool is_mtx_on_host{host_mtx_exec ==
625 mtx_row_ptrs.get_executor()};
626 array<index_type> row_ptrs_host(host_mtx_exec);
627 const index_type* row_ptrs{};
628 if (is_mtx_on_host) {
629 row_ptrs = mtx_row_ptrs.get_const_data();
630 } else {
631 row_ptrs_host = mtx_row_ptrs;
632 row_ptrs = row_ptrs_host.get_const_data();
633 }
634 const auto num_rows = mtx_row_ptrs.get_size() - 1;
635 if (row_ptrs[num_rows] > nnz_limit) {
636 load_balance actual_strategy(nwarps_, warp_size_,
637 cuda_strategy_, strategy_name_);
638 if (is_mtx_on_host) {
639 actual_strategy.process(mtx_row_ptrs, mtx_srow);
640 } else {
641 actual_strategy.process(row_ptrs_host, mtx_srow);
642 }
643 this->set_name(actual_strategy.get_name());
644 } else {
645 index_type maxnum = 0;
646 for (size_type i = 0; i < num_rows; i++) {
647 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
648 }
649 if (maxnum > row_len_limit) {
650 load_balance actual_strategy(
651 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
652 if (is_mtx_on_host) {
653 actual_strategy.process(mtx_row_ptrs, mtx_srow);
654 } else {
655 actual_strategy.process(row_ptrs_host, mtx_srow);
656 }
657 this->set_name(actual_strategy.get_name());
658 } else {
659 classical actual_strategy;
660 if (is_mtx_on_host) {
661 actual_strategy.process(mtx_row_ptrs, mtx_srow);
662 max_length_per_row_ =
663 actual_strategy.get_max_length_per_row();
664 } else {
665 actual_strategy.process(row_ptrs_host, mtx_srow);
666 max_length_per_row_ =
667 actual_strategy.get_max_length_per_row();
668 }
669 this->set_name(actual_strategy.get_name());
670 }
671 }
672 }
673
674 int64_t clac_size(const int64_t nnz) override
675 {
676 return std::make_shared<load_balance>(
677 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
678 ->clac_size(nnz);
679 }
680
681 index_type get_max_length_per_row() const noexcept
682 {
683 return max_length_per_row_;
684 }
685
686 std::shared_ptr<strategy_type> copy() override
687 {
688 return std::make_shared<automatical>(
689 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
690 }
691
692 private:
693 int64_t nwarps_;
694 int warp_size_;
695 bool cuda_strategy_;
696 std::string strategy_name_;
697 index_type max_length_per_row_;
698 };
699
700 friend class Csr<previous_precision<ValueType>, IndexType>;
701
702 void convert_to(
703 Csr<next_precision<ValueType>, IndexType>* result) const override;
704
705 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
706
707#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
708 friend class Csr<previous_precision<ValueType, 2>, IndexType>;
709 using ConvertibleTo<
710 Csr<next_precision<ValueType, 2>, IndexType>>::convert_to;
711 using ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>::move_to;
712
713 void convert_to(
714 Csr<next_precision<ValueType, 2>, IndexType>* result) const override;
715
716 void move_to(Csr<next_precision<ValueType, 2>, IndexType>* result) override;
717#endif
718
719#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
720 friend class Csr<previous_precision<ValueType, 3>, IndexType>;
721 using ConvertibleTo<
722 Csr<next_precision<ValueType, 3>, IndexType>>::convert_to;
723 using ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>::move_to;
724
725 void convert_to(
726 Csr<next_precision<ValueType, 3>, IndexType>* result) const override;
727
728 void move_to(Csr<next_precision<ValueType, 3>, IndexType>* result) override;
729#endif
730
731 void convert_to(Dense<ValueType>* other) const override;
732
733 void move_to(Dense<ValueType>* other) override;
734
735 void convert_to(Coo<ValueType, IndexType>* result) const override;
736
737 void move_to(Coo<ValueType, IndexType>* result) override;
738
739 void convert_to(Ell<ValueType, IndexType>* result) const override;
740
741 void move_to(Ell<ValueType, IndexType>* result) override;
742
743 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
744
745 void move_to(Fbcsr<ValueType, IndexType>* result) override;
746
747 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
748
749 void move_to(Hybrid<ValueType, IndexType>* result) override;
750
751 void convert_to(Sellp<ValueType, IndexType>* result) const override;
752
753 void move_to(Sellp<ValueType, IndexType>* result) override;
754
755 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
756
757 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
758
759 void read(const mat_data& data) override;
760
761 void read(const device_mat_data& data) override;
762
763 void read(device_mat_data&& data) override;
764
765 void write(mat_data& data) const override;
766
767 std::unique_ptr<LinOp> transpose() const override;
768
769 std::unique_ptr<LinOp> conj_transpose() const override;
770
778
781 std::unique_ptr<Permutation<index_type>> value_permutation);
782
791 ptr_param<Csr> output) const;
792
793 std::unique_ptr<Permutation<IndexType>> value_permutation;
794 };
795
807 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
808 const;
809
824 std::unique_ptr<Csr> permute(
825 ptr_param<const Permutation<index_type>> permutation,
827
841 std::unique_ptr<Csr> permute(
842 ptr_param<const Permutation<index_type>> row_permutation,
843 ptr_param<const Permutation<index_type>> column_permutation,
844 bool invert = false) const;
845
866 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
867 ptr_param<const Permutation<index_type>> permutation,
869
888 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
889 ptr_param<const Permutation<index_type>> row_permutation,
890 ptr_param<const Permutation<index_type>> column_permutation,
891 bool invert = false) const;
892
902 std::unique_ptr<Csr> scale_permute(
905
918 std::unique_ptr<Csr> scale_permute(
920 row_permutation,
922 column_permutation,
923 bool invert = false) const;
924
925 std::unique_ptr<LinOp> permute(
926 const array<IndexType>* permutation_indices) const override;
927
928 std::unique_ptr<LinOp> inverse_permute(
929 const array<IndexType>* inverse_permutation_indices) const override;
930
931 std::unique_ptr<LinOp> row_permute(
932 const array<IndexType>* permutation_indices) const override;
933
934 std::unique_ptr<LinOp> column_permute(
935 const array<IndexType>* permutation_indices) const override;
936
937 std::unique_ptr<LinOp> inverse_row_permute(
938 const array<IndexType>* inverse_permutation_indices) const override;
939
940 std::unique_ptr<LinOp> inverse_column_permute(
941 const array<IndexType>* inverse_permutation_indices) const override;
942
943 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
944
945 std::unique_ptr<absolute_type> compute_absolute() const override;
946
948
953
954 /*
955 * Tests if all row entry pairs (value, col_idx) are sorted by column index
956 *
957 * @returns True if all row entry pairs (value, col_idx) are sorted by
958 * column index
959 */
960 bool is_sorted_by_column_index() const;
961
967 value_type* get_values() noexcept { return values_.get_data(); }
968
976 const value_type* get_const_values() const noexcept
977 {
978 return values_.get_const_data();
979 }
980
985 std::unique_ptr<Dense<ValueType>> create_value_view();
986
991 std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
992
998 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
999
1007 const index_type* get_const_col_idxs() const noexcept
1008 {
1009 return col_idxs_.get_const_data();
1010 }
1011
1017 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1018
1026 const index_type* get_const_row_ptrs() const noexcept
1027 {
1028 return row_ptrs_.get_const_data();
1029 }
1030
1036 index_type* get_srow() noexcept { return srow_.get_data(); }
1037
1045 const index_type* get_const_srow() const noexcept
1046 {
1047 return srow_.get_const_data();
1048 }
1049
1056 {
1057 return srow_.get_size();
1058 }
1059
1066 {
1067 return values_.get_size();
1068 }
1069
1074 std::shared_ptr<strategy_type> get_strategy() const noexcept
1075 {
1076 return strategy_;
1077 }
1078
1084 void set_strategy(std::shared_ptr<strategy_type> strategy)
1085 {
1086 strategy_ = std::move(strategy->copy());
1087 this->make_srow();
1088 }
1089
1097 {
1098 auto exec = this->get_executor();
1099 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1100 this->scale_impl(make_temporary_clone(exec, alpha).get());
1101 }
1102
1110 {
1111 auto exec = this->get_executor();
1112 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1113 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1114 }
1115
1124 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1125 std::shared_ptr<strategy_type> strategy);
1126
1138 static std::unique_ptr<Csr> create(
1139 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1140 size_type num_nonzeros = {},
1141 std::shared_ptr<strategy_type> strategy = nullptr);
1142
1162 static std::unique_ptr<Csr> create(
1163 std::shared_ptr<const Executor> exec, const dim<2>& size,
1164 array<value_type> values, array<index_type> col_idxs,
1165 array<index_type> row_ptrs,
1166 std::shared_ptr<strategy_type> strategy = nullptr);
1167
1172 template <typename InputValueType, typename InputColumnIndexType,
1173 typename InputRowPtrType>
1174 GKO_DEPRECATED(
1175 "explicitly construct the gko::array argument instead of passing "
1176 "initializer lists")
1177 static std::unique_ptr<Csr> create(
1178 std::shared_ptr<const Executor> exec, const dim<2>& size,
1179 std::initializer_list<InputValueType> values,
1180 std::initializer_list<InputColumnIndexType> col_idxs,
1181 std::initializer_list<InputRowPtrType> row_ptrs)
1182 {
1183 return create(exec, size, array<value_type>{exec, std::move(values)},
1184 array<index_type>{exec, std::move(col_idxs)},
1185 array<index_type>{exec, std::move(row_ptrs)});
1186 }
1187
1203 static std::unique_ptr<const Csr> create_const(
1204 std::shared_ptr<const Executor> exec, const dim<2>& size,
1205 gko::detail::const_array_view<ValueType>&& values,
1206 gko::detail::const_array_view<IndexType>&& col_idxs,
1207 gko::detail::const_array_view<IndexType>&& row_ptrs,
1208 std::shared_ptr<strategy_type> strategy = nullptr);
1209
1222 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1223 const index_set<IndexType>& row_index_set,
1224 const index_set<IndexType>& column_index_set) const;
1225
1237 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1238 const span& row_span, const span& column_span) const;
1239
1244
1251
1255 Csr(const Csr&);
1256
1263
1264protected:
1265 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1266 size_type num_nonzeros = {},
1267 std::shared_ptr<strategy_type> strategy = nullptr);
1268
1269 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1270 array<value_type> values, array<index_type> col_idxs,
1271 array<index_type> row_ptrs,
1272 std::shared_ptr<strategy_type> strategy = nullptr);
1273
1274 void apply_impl(const LinOp* b, LinOp* x) const override;
1275
1276 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1277 LinOp* x) const override;
1278
1279 // TODO: This provides some more sane settings. Please fix this!
1280 static std::shared_ptr<strategy_type> make_default_strategy(
1281 std::shared_ptr<const Executor> exec)
1282 {
1283 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1284 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1285 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1286 std::shared_ptr<strategy_type> new_strategy;
1287 if (cuda_exec) {
1288 new_strategy = std::make_shared<automatical>(cuda_exec);
1289 } else if (hip_exec) {
1290 new_strategy = std::make_shared<automatical>(hip_exec);
1291 } else if (dpcpp_exec) {
1292 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1293 } else {
1294 new_strategy = std::make_shared<classical>();
1295 }
1296 return new_strategy;
1297 }
1298
1299 // TODO clean this up as soon as we improve strategy_type
1300 template <typename CsrType>
1301 void convert_strategy_helper(CsrType* result) const
1302 {
1303 auto strat = this->get_strategy().get();
1304 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1305 if (dynamic_cast<classical*>(strat)) {
1306 new_strat = std::make_shared<typename CsrType::classical>();
1307 } else if (dynamic_cast<merge_path*>(strat)) {
1308 new_strat = std::make_shared<typename CsrType::merge_path>();
1309 } else if (dynamic_cast<cusparse*>(strat)) {
1310 new_strat = std::make_shared<typename CsrType::cusparse>();
1311 } else if (dynamic_cast<sparselib*>(strat)) {
1312 new_strat = std::make_shared<typename CsrType::sparselib>();
1313 } else {
1314 auto rexec = result->get_executor();
1315 auto cuda_exec =
1316 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1317 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1318 auto dpcpp_exec =
1319 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1320 auto lb = dynamic_cast<load_balance*>(strat);
1321 if (cuda_exec) {
1322 if (lb) {
1323 new_strat =
1324 std::make_shared<typename CsrType::load_balance>(
1325 cuda_exec);
1326 } else {
1327 new_strat = std::make_shared<typename CsrType::automatical>(
1328 cuda_exec);
1329 }
1330 } else if (hip_exec) {
1331 if (lb) {
1332 new_strat =
1333 std::make_shared<typename CsrType::load_balance>(
1334 hip_exec);
1335 } else {
1336 new_strat = std::make_shared<typename CsrType::automatical>(
1337 hip_exec);
1338 }
1339 } else if (dpcpp_exec) {
1340 if (lb) {
1341 new_strat =
1342 std::make_shared<typename CsrType::load_balance>(
1343 dpcpp_exec);
1344 } else {
1345 new_strat = std::make_shared<typename CsrType::automatical>(
1346 dpcpp_exec);
1347 }
1348 } else {
1349 // Try to preserve this executor's configuration
1350 auto this_cuda_exec =
1351 std::dynamic_pointer_cast<const CudaExecutor>(
1352 this->get_executor());
1353 auto this_hip_exec =
1354 std::dynamic_pointer_cast<const HipExecutor>(
1355 this->get_executor());
1356 auto this_dpcpp_exec =
1357 std::dynamic_pointer_cast<const DpcppExecutor>(
1358 this->get_executor());
1359 if (this_cuda_exec) {
1360 if (lb) {
1361 new_strat =
1362 std::make_shared<typename CsrType::load_balance>(
1363 this_cuda_exec);
1364 } else {
1365 new_strat =
1366 std::make_shared<typename CsrType::automatical>(
1367 this_cuda_exec);
1368 }
1369 } else if (this_hip_exec) {
1370 if (lb) {
1371 new_strat =
1372 std::make_shared<typename CsrType::load_balance>(
1373 this_hip_exec);
1374 } else {
1375 new_strat =
1376 std::make_shared<typename CsrType::automatical>(
1377 this_hip_exec);
1378 }
1379 } else if (this_dpcpp_exec) {
1380 if (lb) {
1381 new_strat =
1382 std::make_shared<typename CsrType::load_balance>(
1383 this_dpcpp_exec);
1384 } else {
1385 new_strat =
1386 std::make_shared<typename CsrType::automatical>(
1387 this_dpcpp_exec);
1388 }
1389 } else {
1390 // FIXME: this changes strategies.
1391 // We had a load balance or automatical strategy from a non
1392 // HIP or Cuda executor and are moving to a non HIP or Cuda
1393 // executor.
1394 new_strat = std::make_shared<typename CsrType::classical>();
1395 }
1396 }
1397 }
1398 result->set_strategy(new_strat);
1399 }
1400
1404 void make_srow()
1405 {
1406 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1407 strategy_->process(row_ptrs_, &srow_);
1408 }
1409
1416 virtual void scale_impl(const LinOp* alpha);
1417
1424 virtual void inv_scale_impl(const LinOp* alpha);
1425
1426private:
1427 std::shared_ptr<strategy_type> strategy_;
1428 array<value_type> values_;
1429 array<index_type> col_idxs_;
1430 array<index_type> row_ptrs_;
1431 array<index_type> srow_;
1432
1433 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1434};
1435
1436
1437namespace detail {
1438
1439
1446template <typename ValueType, typename IndexType>
1447void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1448{
1449 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1450 using automatical = typename Csr<ValueType, IndexType>::automatical;
1451 auto strategy = result->get_strategy();
1452 auto executor = result->get_executor();
1453 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1454 if (auto exec =
1455 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1456 result->set_strategy(std::make_shared<load_balance>(exec));
1457 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1458 executor)) {
1459 result->set_strategy(std::make_shared<load_balance>(exec));
1460 }
1461 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1462 if (auto exec =
1463 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1464 result->set_strategy(std::make_shared<automatical>(exec));
1465 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1466 executor)) {
1467 result->set_strategy(std::make_shared<automatical>(exec));
1468 }
1469 }
1470}
1471
1472
1473} // namespace detail
1474} // namespace matrix
1475} // namespace gko
1476
1477
1478#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition polymorphic_object.hpp:479
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The diagonal of a LinOp implementing this interface can be extracted.
Definition lin_op.hpp:743
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:668
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:243
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition array.hpp:166
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:687
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:703
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:696
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:670
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition coo.hpp:65
Definition csr.hpp:49
Definition csr.hpp:521
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:686
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:594
automatical()
Creates an automatical strategy.
Definition csr.hpp:549
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:674
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:559
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:605
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:579
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:568
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:232
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:239
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:268
classical()
Creates a classical strategy.
Definition csr.hpp:237
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:261
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:307
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:318
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:320
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:312
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:314
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:353
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:416
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:508
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:380
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:361
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:469
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:406
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:371
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:391
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:282
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:293
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:295
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:287
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:289
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:331
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:342
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:338
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:336
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:344
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:175
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:193
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:184
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:126
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1026
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:1045
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:1084
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:1109
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:1036
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition csr.hpp:1055
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1017
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:976
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:1065
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:1074
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:1007
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:1096
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:967
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:998
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:120
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:56
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:66
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:116
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:57
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:58
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
The matrix namespace.
Definition dense_cache.hpp:24
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
Definition permutation.hpp:53
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:264
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:283
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:295
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:614
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:90
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:750
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:473
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:466
STL namespace.
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
permuting_reuse_info()
Creates an empty reuse info.
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
permuting_reuse_info(std::unique_ptr< Permutation< index_type > > value_permutation)
Creates a reuse info structure from its value permutation.
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46