SIRIUS 7.5.0
Electronic structure library and applications
dmatrix.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2022 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file dmatrix.hpp
21 *
22 * \brief Contains definition and implementation of distributed matrix class.
23 */
24
25#ifndef __DMATRIX_HPP__
26#define __DMATRIX_HPP__
27
28#include <iomanip>
29#include <spla/spla.hpp>
30#include <costa/layout.hpp>
31#include <costa/grid2grid/transformer.hpp>
33#include "core/splindex.hpp"
34#include "core/hdf5_tree.hpp"
35#include "core/typedefs.hpp"
36#include "core/rte/rte.hpp"
37#include "core/json.hpp"
38#include "SDDK/memory.hpp"
39
40namespace sirius {
41
42namespace la {
43
44namespace fmt {
45template <typename T>
46std::ostream& operator<<(std::ostream& out, std::complex<T> z)
47{
48 out << z.real() << " + I*" << z.imag();
49 return out;
50}
51}
52
53/// Distributed matrix.
54template <typename T>
55class dmatrix: public sddk::matrix<T>
56{
57 private:
58 /// Global number of matrix rows.
59 int num_rows_{0};
60
61 /// Global number of matrix columns.
62 int num_cols_{0};
63
64 /// Row block size.
65 int bs_row_{0};
66
67 /// Column block size.
68 int bs_col_{0};
69
70 /// BLACS grid.
71 BLACS_grid const* blacs_grid_{nullptr};
72
73 /// Split index of matrix rows.
75
76 /// Split index of matrix columns.
78
79 /// ScaLAPACK matrix descriptor.
80 ftn_int descriptor_[9];
81
82 /// Matrix distribution used for SPLA library functions
83 spla::MatrixDistribution spla_dist_{spla::MatrixDistribution::create_mirror(MPI_COMM_SELF)};
84
85 costa::grid_layout<T> grid_layout_;
86
87 void init()
88 {
89 if (blacs_grid_ != nullptr) {
90#ifdef SIRIUS_SCALAPACK
91 linalg_base::descinit(descriptor_, num_rows_, num_cols_, bs_row_, bs_col_, 0, 0, blacs_grid_->context(),
93#endif
94 grid_layout_ = costa::block_cyclic_layout<T>(this->num_rows(), this->num_cols(), this->bs_row(),
95 this->bs_col(), 1, 1, this->num_rows(), this->num_cols(), this->blacs_grid().num_ranks_row(),
96 this->blacs_grid().num_ranks_col(), 'R', 0, 0, this->at(sddk::memory_t::host), this->ld(), 'C',
97 this->blacs_grid().comm().rank());
98 }
99 }
100
101 /* forbid copy constructor */
102 dmatrix(dmatrix<T> const& src) = delete;
103 /* forbid assignment operator */
104 dmatrix<T>& operator=(dmatrix<T> const& src) = delete;
105
106 public:
107 // Default constructor
108 dmatrix()
109 {
110 }
111
112 dmatrix(int num_rows__, int num_cols__, BLACS_grid const& blacs_grid__, int bs_row__, int bs_col__,
113 sddk::memory_t mem_type__ = sddk::memory_t::host);
114
115 dmatrix(int num_rows__, int num_cols__, sddk::memory_t mem_type__ = sddk::memory_t::host);
116
117 dmatrix(int num_rows__, int num_cols__, sddk::memory_pool& mp__, std::string const& label__ = "");
118
119 dmatrix(T* ptr__, int num_rows__, int num_cols__, BLACS_grid const& blacs_grid__, int bs_row__, int bs_col__);
120
121 dmatrix(int num_rows__, int num_cols__, BLACS_grid const& blacs_grid__, int bs_row__, int bs_col__,
122 sddk::memory_pool& mp__);
123
124 dmatrix(T* ptr__, int num_rows__, int num_cols__);
125
126 dmatrix(dmatrix<T>&& src) = default;
127
128 dmatrix<T>& operator=(dmatrix<T>&& src) = default;
129
130 /// Return size of the square matrix or -1 in case of rectangular matrix.
131 inline int size() const
132 {
133 if (num_rows_ == num_cols_) {
134 return num_rows_;
135 }
136 return -1;
137 }
138
139 inline int size_local() const
140 {
141 return this->num_rows_local() * this->num_cols_local();
142 }
143
144 /// Return number of rows in the global matrix.
145 inline int num_rows() const
146 {
147 return num_rows_;
148 }
149
150 /// Return local number of rows for this MPI rank.
151 inline int num_rows_local() const
152 {
153 return spl_row_.local_size();
154 }
155
156 /// Return local number of rows for a given MPI rank.
157 inline int num_rows_local(int rank) const
158 {
159 return spl_row_.local_size(block_id(rank));
160 }
161
162 /// Return global row index in the range [0, num_rows) by the local index in the range [0, num_rows_local).
163 inline int irow(int irow_loc) const
164 {
165 return spl_row_.global_index(irow_loc);
166 }
167
168 inline int num_cols() const
169 {
170 return num_cols_;
171 }
172
173 /// Local number of columns.
174 inline int num_cols_local() const
175 {
176 return spl_col_.local_size();
177 }
178
179 inline int num_cols_local(int rank) const
180 {
181 return spl_col_.local_size(block_id(rank));
182 }
183
184 /// Inindex of column in global matrix.
185 inline int icol(int icol_loc) const
186 {
187 return spl_col_.global_index(icol_loc);
188 }
189
190 inline int const* descriptor() const
191 {
192 return descriptor_;
193 }
194
195 inline spla::MatrixDistribution& spla_distribution()
196 {
197 return spla_dist_;
198 }
199
200 //void zero(int ir0__, int ic0__, int nr__, int nc__)
201 //{
202 // splindex<splindex_t::block_cyclic> spl_r0(ir0__, blacs_grid().num_ranks_row(), blacs_grid().rank_row(), bs_row_);
203 // splindex<splindex_t::block_cyclic> spl_r1(ir0__ + nr__, blacs_grid().num_ranks_row(), blacs_grid().rank_row(), bs_row_);
204
205 // splindex<splindex_t::block_cyclic> spl_c0(ic0__, blacs_grid().num_ranks_col(), blacs_grid().rank_col(), bs_col_);
206 // splindex<splindex_t::block_cyclic> spl_c1(ic0__ + nc__, blacs_grid().num_ranks_col(), blacs_grid().rank_col(), bs_col_);
207
208 // int m0 = spl_r0.local_size();
209 // int m1 = spl_r1.local_size();
210 // int n0 = spl_c0.local_size();
211 // int n1 = spl_c1.local_size();
212 // for (int j = n0; j < n1; j++) {
213 // std::fill(this->template at<device_t::CPU>(m0, j), this->template at<CPU>(m1, j), 0);
214 // }
215
216 // if (this->on_device()) {
217 // acc::zero(this->template at<device_t::GPU>(m0, n0), this->ld(), m1 - m0, n1 - n0);
218 // }
219 //}
220
222
223 void copy_to(sddk::memory_t mem__, int ir0__, int ic0__, int nr__, int nc__)
224 {
225 int m0, m1, n0, n1;
226 if (blacs_grid_ != nullptr) {
227 splindex_block_cyclic<> spl_r0(ir0__, n_blocks(blacs_grid().num_ranks_row()),
228 block_id(blacs_grid().rank_row()), bs_row_);
229 splindex_block_cyclic<> spl_r1(ir0__ + nr__, n_blocks(blacs_grid().num_ranks_row()),
230 block_id(blacs_grid().rank_row()), bs_row_);
231
232 splindex_block_cyclic<> spl_c0(ic0__, n_blocks(blacs_grid().num_ranks_col()),
233 block_id(blacs_grid().rank_col()), bs_col_);
234 splindex_block_cyclic<> spl_c1(ic0__ + nc__, n_blocks(blacs_grid().num_ranks_col()),
235 block_id(blacs_grid().rank_col()), bs_col_);
236
237 m0 = spl_r0.local_size();
238 m1 = spl_r1.local_size();
239 n0 = spl_c0.local_size();
240 n1 = spl_c1.local_size();
241 } else {
242 m0 = ir0__;
243 m1 = ir0__ + nr__;
244 n0 = ic0__;
245 n1 = ic0__ + nc__;
246 }
247
248 if (is_host_memory(mem__)) {
249 acc::copyout(this->at(sddk::memory_t::host, m0, n0), this->ld(),
250 this->at(sddk::memory_t::device, m0, n0), this->ld(), m1 - m0, n1 - n0);
251 }
252 if (is_device_memory(mem__)) {
253 acc::copyin(this->at(sddk::memory_t::device, m0, n0), this->ld(),
254 this->at(sddk::memory_t::host, m0, n0), this->ld(), m1 - m0, n1 - n0);
255 }
256 }
257
258 void set(int ir0__, int jc0__, int mr__, int nc__, T* ptr__, int ld__);
259
260 void set(const int irow_glob, const int icol_glob, T val);
261
262 void add(const int irow_glob, const int icol_glob, T val);
263
264 void add(real_type<T> beta__, const int irow_glob, const int icol_glob, T val);
265
266 void make_real_diag(int n__);
267
268 sddk::mdarray<T, 1> get_diag(int n__);
269
270 inline auto const& spl_col() const
271 {
272 return spl_col_;
273 }
274
275 inline auto const& spl_row() const
276 {
277 return spl_row_;
278 }
279
280 inline int rank_row() const
281 {
282 return blacs_grid_->rank_row();
283 }
284
285 inline int num_ranks_row() const
286 {
287 return blacs_grid_->num_ranks_row();
288 }
289
290 inline int rank_col() const
291 {
292 return blacs_grid_->rank_col();
293 }
294
295 inline int num_ranks_col() const
296 {
297 return blacs_grid_->num_ranks_col();
298 }
299
300 /// Row blocking factor
301 inline int bs_row() const
302 {
303 return bs_row_;
304 }
305
306 /// Column blocking factor
307 inline int bs_col() const
308 {
309 return bs_col_;
310 }
311
312 inline auto const& blacs_grid() const
313 {
314 RTE_ASSERT(blacs_grid_ != nullptr);
315 return *blacs_grid_;
316 }
317
318 void save_to_hdf5(std::string name__, int m__, int n__);
319
320 auto get_full_matrix() const
321 {
322 sddk::mdarray<T, 2> full_mtrx(num_rows(), num_cols());
323 full_mtrx.zero();
324
325 for (int j = 0; j < num_cols_local(); j++) {
326 for (int i = 0; i < num_rows_local(); i++) {
327 full_mtrx(irow(i), icol(j)) = (*this)(i, j);
328 }
329 }
330 if (blacs_grid_) {
331 blacs_grid_->comm().allreduce(full_mtrx.at(sddk::memory_t::host), static_cast<int>(full_mtrx.size()));
332 }
333 return full_mtrx;
334 }
335
336 nlohmann::json serialize_to_json(int m__, int n__) const
337 {
338 auto full_mtrx = get_full_matrix();
339
340 nlohmann::json dict;
341 dict["mtrx_re"] = nlohmann::json::array();
342 for (int i = 0; i < num_rows(); i++) {
343 dict["mtrx_re"].push_back(nlohmann::json::array());
344 for (int j = 0; j < num_cols(); j++) {
345 dict["mtrx_re"][i].push_back(std::real(full_mtrx(i, j)));
346 }
347 }
348 if (!std::is_scalar<T>::value) {
349 dict["mtrx_im"] = nlohmann::json::array();
350 for (int i = 0; i < num_rows(); i++) {
351 dict["mtrx_im"].push_back(nlohmann::json::array());
352 for (int j = 0; j < num_cols(); j++) {
353 dict["mtrx_im"][i].push_back(std::imag(full_mtrx(i, j)));
354 }
355 }
356 }
357 return dict;
358 }
359
360 std::stringstream serialize(std::string name__, int m__, int n__) const
361 {
362 auto full_mtrx = get_full_matrix();
363
364 std::stringstream out;
365 using namespace fmt;
366 out << std::setprecision(12) << std::setw(24) << std::fixed;
367
368 out << "matrix label : " << name__ << std::endl;
369 out << "{" << std::endl;
370 for (int i = 0; i < m__; i++) {
371 out << "{";
372 for (int j = 0; j < n__; j++) {
373 out << full_mtrx(i, j);
374 if (j != n__ - 1) {
375 out << ",";
376 }
377 }
378 if (i != n__ - 1) {
379 out << "}," << std::endl;
380 } else {
381 out << "}" << std::endl;
382 }
383 }
384 out << "}";
385
386 return out;
387 }
388
389 inline T checksum(int m__, int n__) const
390 {
391 T cs{0};
392
393 if (blacs_grid_ != nullptr) {
394 splindex_block_cyclic<> spl_row(m__, n_blocks(this->blacs_grid().num_ranks_row()),
395 block_id(this->blacs_grid().rank_row()), this->bs_row());
396 splindex_block_cyclic<> spl_col(n__, n_blocks(this->blacs_grid().num_ranks_col()),
397 block_id(this->blacs_grid().rank_col()), this->bs_col());
398 for (int i = 0; i < spl_col.local_size(); i++) {
399 for (int j = 0; j < spl_row.local_size(); j++) {
400 cs += (*this)(j, i);
401 }
402 }
403 this->blacs_grid().comm().allreduce(&cs, 1);
404 } else {
405 for (int i = 0; i < n__; i++) {
406 for (int j = 0; j < m__; j++) {
407 cs += (*this)(j, i);
408 }
409 }
410 }
411 return cs;
412 }
413
414 inline auto const& comm() const
415 {
416 if (blacs_grid_ != nullptr) {
417 return blacs_grid().comm();
418 } else {
419 return mpi::Communicator::self();
420 }
421 }
422
423 costa::grid_layout<T>& grid_layout()
424 {
425 return grid_layout_;
426 }
427
428 costa::grid_layout<T> grid_layout(int irow0__, int jcol0__, int mrow__, int ncol__)
429 {
430 return costa::block_cyclic_layout<T>(this->num_rows(), this->num_cols(), this->bs_row(),
431 this->bs_col(), irow0__ + 1, jcol0__ + 1, mrow__, ncol__, this->blacs_grid().num_ranks_row(),
432 this->blacs_grid().num_ranks_col(), 'R', 0, 0, this->at(sddk::memory_t::host), this->ld(), 'C',
433 this->blacs_grid().comm().rank());
434 }
435};
436
437} // namespace
438
439} // namespace sirius
440
441#endif // __DMATRIX_HPP__
Contains declaration and implementation of sddk::BLACS_grid class.
BLACS grid wrapper.
Definition: blacs_grid.hpp:43
Distributed matrix.
Definition: dmatrix.hpp:56
int icol(int icol_loc) const
Inindex of column in global matrix.
Definition: dmatrix.hpp:185
int num_cols_
Global number of matrix columns.
Definition: dmatrix.hpp:62
int num_rows_local() const
Return local number of rows for this MPI rank.
Definition: dmatrix.hpp:151
int num_rows_
Global number of matrix rows.
Definition: dmatrix.hpp:59
int size() const
Return size of the square matrix or -1 in case of rectangular matrix.
Definition: dmatrix.hpp:131
ftn_int descriptor_[9]
ScaLAPACK matrix descriptor.
Definition: dmatrix.hpp:80
spla::MatrixDistribution spla_dist_
Matrix distribution used for SPLA library functions.
Definition: dmatrix.hpp:83
int num_rows() const
Return number of rows in the global matrix.
Definition: dmatrix.hpp:145
sirius::splindex_block_cyclic spl_col_
Split index of matrix columns.
Definition: dmatrix.hpp:77
sirius::splindex_block_cyclic spl_row_
Split index of matrix rows.
Definition: dmatrix.hpp:74
int bs_row() const
Row blocking factor.
Definition: dmatrix.hpp:301
int num_cols_local() const
Local number of columns.
Definition: dmatrix.hpp:174
int bs_col() const
Column blocking factor.
Definition: dmatrix.hpp:307
int bs_row_
Row block size.
Definition: dmatrix.hpp:65
int bs_col_
Column block size.
Definition: dmatrix.hpp:68
BLACS_grid const * blacs_grid_
BLACS grid.
Definition: dmatrix.hpp:71
int num_rows_local(int rank) const
Return local number of rows for a given MPI rank.
Definition: dmatrix.hpp:157
int irow(int irow_loc) const
Return global row index in the range [0, num_rows) by the local index in the range [0,...
Definition: dmatrix.hpp:163
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
Index_t::global global_index(typename Index_t::local idxloc__, block_id block_id__) const
Return global index of an element by local index and block id.
Definition: splindex.hpp:428
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Definition: splindex.hpp:387
Contains definition and implementation of sirius::HDF5_tree class.
Interface to nlohmann::json library and helper functions.
Memory management functions and classes.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
bool is_host_memory(memory_t mem__)
Check if this is a valid host memory (memory, accessible by the host).
Definition: memory.hpp:86
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
Definition: acc.hpp:367
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
Definition: acc.hpp:337
Namespace of the SIRIUS library.
Definition: sirius.f90:5
std::ostream & operator<<(std::ostream &out, hbar &&b)
Inject horisontal bar to ostream.
strong_type< int, struct __block_id_tag > block_id
ID of the block.
Definition: splindex.hpp:108
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
Definition: splindex.hpp:105
Eror and warning handling during run-time execution.
Contains definition of sddk::splindex_base and specializations of sddk::splindex class.
Contains typedefs, enums and simple descriptors.