25#ifndef __NON_LOCAL_OPERATOR_BASE_HPP__
26#define __NON_LOCAL_OPERATOR_BASE_HPP__
43 int packed_mtrx_size_;
76 std::enable_if_t<std::is_same<std::complex<T>, F>::value,
void>
83 identity_t<F> alpha = F{1}, identity_t<F> beta = F{0})
const;
88 identity_t<F> alpha = F{1}, identity_t<F> beta = F{0})
const;
90 template <
typename F,
typename = std::enable_if_t<std::is_same<T, real_type<F>>::value>>
91 inline F value(
int xi1__,
int xi2__,
int ia__)
93 return this->value<F>(xi1__, xi2__, 0, ia__);
96 template <typename F, std::enable_if_t<std::is_same<T, F>::value,
bool> =
true>
97 F value(
int xi1__,
int xi2__,
int ispn__,
int ia__)
99 int nbf = this->ctx_.unit_cell().atom(ia__).mt_basis_size();
100 return this->
op_(0, packed_mtrx_offset_(ia__) + xi2__ * nbf + xi1__, ispn__);
103 template <
typename F, std::enable_if_t<std::is_same<std::complex<T>, F>::value,
bool> = true>
104 F value(
int xi1__,
int xi2__,
int ispn__,
int ia__)
106 int nbf = this->ctx_.unit_cell().atom(ia__).mt_basis_size();
107 return std::complex<T>(this->
op_(0, packed_mtrx_offset_(ia__) + xi2__ * nbf + xi1__, ispn__),
108 this->
op_(1, packed_mtrx_offset_(ia__) + xi2__ * nbf + xi1__, ispn__));
111 int size(
int i)
const;
113 inline bool is_diag()
const
118 template <
typename F>
119 sddk::matrix<F> get_matrix(
int ispn, sddk::memory_t mem)
const;
129 PROFILE(
"sirius::Non_local_operator::apply");
135 auto& beta_gk = beta_coeffs__.pw_coeffs_a_;
136 int num_gkvec_loc = beta_gk.size(0);
144 pu = sddk::device_t::GPU;
148 if (std::is_same<F, real_type<F>>::value) {
168 reinterpret_cast<F const*
>(op_.at(mem__, 0, packed_mtrx_offset_(ia), ispn_block__)),
169 nbf,
reinterpret_cast<F const*
>(beta_phi__.at(mem__, offs, 0)), beta_phi__.
ld(),
176 case sddk::device_t::GPU: {
182 case sddk::device_t::CPU: {
191 reinterpret_cast<F const*
>(beta_gk.at(mem__)), num_gkvec_loc * size_factor, work.at(mem__), nbeta,
193 reinterpret_cast<F*
>(op_phi__.
at(mem__, 0, sp,
wf::band_index(br__.begin()))),
194 op_phi__.
ld() * size_factor);
197 case sddk::device_t::GPU: {
201 case sddk::device_t::CPU: {
209std::enable_if_t<std::is_same<std::complex<T>, F>::value,
void>
218 auto& beta_gk = beta_coeffs__.pw_coeffs_a_;
219 int num_gkvec_loc = beta_gk.size(0);
233 pu = sddk::device_t::GPU;
239 reinterpret_cast<std::complex<T>*
>(op_.at(mem__, 0, packed_mtrx_offset_(ia), ispn_block__)), nbf,
241 work.at(mem__), nbf);
243 int jspn = ispn_block__ & 1;
246 beta_gk.at(mem__, 0, offs), num_gkvec_loc, work.at(mem__), nbf,
251 case sddk::device_t::CPU: {
254 case sddk::device_t::GPU: {
267 identity_t<F> alpha, identity_t<F> beta)
const
271 auto& uc = this->ctx_.unit_cell();
272 std::vector<int> offsets(uc.num_atoms() + 1, 0);
273 for (
int ia = 0; ia < uc.num_atoms(); ++ia) {
274 offsets[ia + 1] = offsets[ia] + uc.atom(ia).mt_basis_size();
278 RTE_ASSERT(out.
size(0) == B__.
size(0) &&
static_cast<int>(out.
size(1)) == this->size_);
279 RTE_ASSERT(
static_cast<int>(B__.
size(1)) == this->size_);
281 int num_atoms = uc.num_atoms();
288 for (
int ja = 0; ja < num_atoms; ++ja) {
289 int offset_ja = offsets[ja];
290 int size_ja = offsets[ja + 1] - offsets[ja];
292 const F* Bs = B__.at(mem_t, 0, offset_ja);
294 const F* Qs =
reinterpret_cast<const F*
>(op_.at(mem_t, 0, packed_mtrx_offset_(ja), ispn_block__));
295 F* C = out.at(mem_t, 0, offset_ja);
298 la::wrap(la).
gemm(
'N',
'N', B__.
size(0), size_ja, size_ja, &alpha, Bs, B__.
ld(), Qs, nbf, &beta, C, out.
ld());
306 identity_t<F> alpha, identity_t<F> beta)
const
310 auto& uc = this->ctx_.unit_cell();
311 std::vector<int> offsets(uc.num_atoms() + 1, 0);
312 for (
int ia = 0; ia < uc.num_atoms(); ++ia) {
313 offsets[ia + 1] = offsets[ia] + uc.atom(ia).mt_basis_size();
317 RTE_ASSERT(
static_cast<int>(out.
size(0)) == this->size_ && out.
size(1) == B__.
size(1));
318 RTE_ASSERT(
static_cast<int>(B__.
size(0)) == this->size_);
320 int num_atoms = uc.num_atoms();
327 for (
int ia = 0; ia < num_atoms; ++ia) {
328 int offset_ia = offsets[ia];
329 int size_ia = offsets[ia + 1] - offsets[ia];
330 const F* Bs = B__.at(mem_t, offset_ia, 0);
332 const F* Qs =
reinterpret_cast<const F*
>(op_.at(mem_t, 0, packed_mtrx_offset_(ia), ispn_block__));
333 F* C = out.at(mem_t, offset_ia, 0);
335 la::wrap(la).
gemm(
'N',
'N', size_ia, B__.
size(1), size_ia, &alpha, Qs, size_ia, Bs, B__.
ld(), &beta, C,
Contains declaration and implementation of sirius::Beta_projectors_base class.
Non-local part of the Hamiltonian and S-operator in the pseudopotential method.
bool is_diag_
True if the operator is diagonal in spin.
std::enable_if_t< std::is_same< std::complex< T >, F >::value, void > apply(sddk::memory_t mem__, int chunk__, atom_index_t::local ia__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > &beta_phi__)
Apply beta projectors from one atom in a chunk of beta projectors to all wave-functions.
void apply(sddk::memory_t mem__, int chunk__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > const &beta_phi__) const
Apply chunk of beta-projectors to all wave functions.
sddk::mdarray< T, 3 > op_
Non-local operator matrix.
void lmatmul(sddk::matrix< F > &out, sddk::matrix< F > const &B__, int ispn_block__, sddk::memory_t mem_t, identity_t< F > alpha=F{1}, identity_t< F > beta=F{0}) const
computes α B*Q + β out
void rmatmul(sddk::matrix< F > &out, sddk::matrix< F > const &B__, int ispn_block__, sddk::memory_t mem_t, identity_t< F > alpha=F{1}, identity_t< F > beta=F{0}) const
computes α Q*B + β out
Simulation context is a set of parameters and objects describing a single simulation.
Helper class to wrap stream id (integer number).
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
uint32_t ld() const
Return leading dimension size.
size_t size() const
Return total size (number of elements) of the array.
auto actual_spin_index(spin_index s__) const
Return the actual spin index of the wave-functions.
auto ld() const
Return leading dimensions of the wave-functions coefficients array.
std::complex< T > const * at(sddk::memory_t mem__, int xi__, atom_index_t::local ia__, spin_index s__, band_index b__) const
Return const pointer to the coefficient by atomic orbital index, atom, spin and band indices.
Wave-functions representation.
Describe a range of bands.
device_t
Type of the main processing unit.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
memory_t
Memory types where the code can store data.
int num_devices()
Get the number of devices.
void set_device_id(int id__)
Set the GPU id.
void sync_stream(stream_id sid__)
Synchronize a single stream.
void zero(T *ptr__, size_t n__)
Zero the device memory.
lib_t
Type of linear algebra backend library.
@ gpublas
GPU BLAS (cuBlas or ROCblas)
int get_device_id(int num_devices__)
Get GPU device id associated with the current rank.
Namespace of the SIRIUS library.
Contains definition and implementation of Simulation_context class.
sddk::mdarray< int, 2 > desc_
Descriptor of block of beta-projectors for an atom.
int num_atoms_
Number of atoms in the current chunk.
int num_beta_
Number of beta-projectors in the current chunk.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.
Stores a chunk of the beta-projector and metadata.
beta_chunk_t beta_chunk_
Descriptor of the current beta chunk.
Helper functions for type traits.