6U_operator<T>::U_operator(Simulation_context
const& ctx__, Hubbard_matrix
const& um1__, std::array<double, 3> vk__)
10 if (!ctx_.hubbard_correction()) {
14 auto r = ctx_.unit_cell().num_hubbard_wf();
15 this->nhwf_ = r.first;
16 this->offset_ = um1__.offset();
17 this->atomic_orbitals_ = um1__.atomic_orbitals();
18 for (
int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
19 um_[j] = la::dmatrix<std::complex<T>>(r.first, r.first);
24 for (
int at_lvl = 0; at_lvl < static_cast<int>(um1__.atomic_orbitals().size()); at_lvl++) {
25 const int ia = um1__.atomic_orbitals(at_lvl).first;
26 auto& atom_type = ctx_.unit_cell().atom(ia).type();
27 int lo_ind = um1__.atomic_orbitals(at_lvl).second;
28 if (atom_type.lo_descriptor_hub(lo_ind).use_for_calculation()) {
29 int lmmax_at = 2 * atom_type.lo_descriptor_hub(lo_ind).l() + 1;
30 for (
int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
31 for (
int m2 = 0; m2 < lmmax_at; m2++) {
32 for (
int m1 = 0; m1 < lmmax_at; m1++) {
33 um_[j](um1__.offset(at_lvl) + m1, um1__.offset(at_lvl) + m2) = um1__.local(at_lvl)(m1, m2, j);
40 for (
int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
41 auto nl = ctx_.cfg().hubbard().nonlocal(i);
42 int ia = nl.atom_pair()[0];
43 int ja = nl.atom_pair()[1];
49 int at1_lvl = um1__.find_orbital_index(ia, nl.n()[0], il);
50 int at2_lvl = um1__.find_orbital_index(ja, nl.n()[1], jl);
52 auto z1 = std::exp(std::complex<double>(0, twopi * dot(vk_, r3::vector<int>(Tr))));
53 for (
int is = 0; is < ctx_.num_spins(); is++) {
54 for (
int m1 = 0; m1 < 2 * il + 1; m1++) {
55 for (
int m2 = 0; m2 < 2 * jl + 1; m2++) {
56 um_[is](um1__.offset(at1_lvl) + m1, um1__.offset(at2_lvl) + m2) +=
57 z1 * um1__.nonlocal(i)(m1, m2, is);
62 for (
int is = 0; is < ctx_.num_spins(); is++) {
63 auto diff = check_hermitian(um_[is], r.first);
65 RTE_THROW(
"um is not Hermitian");
67 if (env::print_checksum()) {
68 print_checksum(
"um" + std::to_string(is), um_[is].checksum(r.first, r.first), RTE_OUT(ctx_.out()));
70 if (ctx_.processing_unit() == sddk::device_t::GPU) {
71 um_[is].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
78U_operator<T>::find_orbital_index(
const int ia__,
const int n__,
const int l__)
const
81 for (at_lvl = 0; at_lvl < static_cast<int>(atomic_orbitals_.size()); at_lvl++) {
82 int lo_ind = atomic_orbitals_[at_lvl].second;
83 int atom_id = atomic_orbitals_[at_lvl].first;
84 if ((atomic_orbitals_[at_lvl].first == ia__) &&
85 (ctx_.unit_cell().atom(atom_id).type().lo_descriptor_hub(lo_ind).n() == n__) &&
86 (ctx_.unit_cell().atom(atom_id).type().lo_descriptor_hub(lo_ind).l() == l__))
90 if (at_lvl ==
static_cast<int>(atomic_orbitals_.size())) {
91 std::cout <<
"atom: " << ia__ <<
"n: " << n__ <<
", l: " << l__ << std::endl;
92 RTE_THROW(
"Found an arbital that is not listed\n");
97template class U_operator<double>;
98#if defined(SIRIUS_USE_FP32)
99template class U_operator<float>;
114 if (!ctx__.hubbard_correction()) {
121 auto la = la::lib_t::blas;
123 la = la::lib_t::gpublas;
139 #pragma omp parallel for schedule(static)
140 for (
int at_lvl = 0; at_lvl < (int)um__.atomic_orbitals().size(); at_lvl++) {
141 const int ia = um__.atomic_orbitals(at_lvl).first;
142 auto const& atom = ctx__.unit_cell().atom(ia);
143 if (atom.type().lo_descriptor_hub(um__.atomic_orbitals(at_lvl).second).use_for_calculation()) {
144 const int lmax_at = 2 * atom.type().lo_descriptor_hub(um__.atomic_orbitals(at_lvl).second).l() + 1;
148 for (
int s1 = 0; s1 < ctx__.
num_spins(); s1++) {
149 for (
int s2 = 0; s2 < ctx__.
num_spins(); s2++) {
151 for (
int nbd = 0; nbd < br__.size(); nbd++) {
152 for (
int m1 = 0; m1 < lmax_at; m1++) {
153 for (
int m2 = 0; m2 < lmax_at; m2++) {
154 const int ind = (s1 == s2) * s1 + (1 + 2 * s2 + s1) * (s1 != s2);
155 Up(um__.nhwf() * s1 + um__.offset(at_lvl) + m1, nbd) +=
156 um__(um__.offset(at_lvl) + m2, um__.offset(at_lvl) + m1, ind) *
157 dm(um__.nhwf() * s2 + um__.offset(at_lvl) + m2, nbd);
167 um__.at(mt, 0, 0, spins__.begin().get()), um__.nhwf(), dm.at(mt, 0, 0), dm.
ld(),
170 Up.copy_to(sddk::memory_t::host);
173 for (
auto s = spins__.begin(); s != spins__.end(); s++) {
176 wf::transform(ctx__.spla_context(), mt, Up, 0, 0, 1.0, hub_wf__, sp,
wf::band_range(0, hub_wf__.
num_wf().get()),
177 1.0, hphi__, sp1, br__);
181template void apply_U_operator<double>(Simulation_context&, wf::spin_range, wf::band_range,
182 const wf::Wave_functions<double>&,
const wf::Wave_functions<double>&,
183 U_operator<double>
const&, wf::Wave_functions<double>&);
185#ifdef SIRIUS_USE_FP32
186template void apply_U_operator<float>(Simulation_context&, wf::spin_range, wf::band_range,
187 const wf::Wave_functions<float>&,
const wf::Wave_functions<float>&,
188 U_operator<float>
const&, wf::Wave_functions<float>&);
200 if (sddk::is_device_memory(mem__)) {
201 RTE_ASSERT((bp__.device_t() == sddk::device_t::GPU));
204 using complex_t = std::complex<double>;
207 for (
int ichunk = 0; ichunk < bp__.num_chunks(); ichunk++) {
209 bp__.generate(bp_coeffs__, ichunk);
211 bp_strain_deriv__.generate(bp_strain_deriv_coeffs__, ichunk, comp__);
214 auto& spla_ctx = bp__.ctx().spla_context();
216 bool result_on_device = bp__.ctx().processing_unit() == sddk::device_t::GPU;
217 auto dbeta_phi = inner_prod_beta<complex_t>(spla_ctx, mem__, host_mem, result_on_device,
218 bp_strain_deriv_coeffs__, phi__,
wf::spin_index(0), band_range_phi);
219 auto beta_phi = inner_prod_beta<complex_t>(spla_ctx, mem__, host_mem, result_on_device, bp_coeffs__, phi__,
223 q_op__.
apply(mem__, ichunk, 0, ds_phi__,
band_range, bp_coeffs__, dbeta_phi);
224 q_op__.
apply(mem__, ichunk, 0, ds_phi__,
band_range, bp_strain_deriv_coeffs__, beta_phi);
void apply(sddk::memory_t mem__, int chunk__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > const &beta_phi__) const
Apply chunk of beta-projectors to all wave functions.
Simulation context is a set of parameters and objects describing a single simulation.
auto processing_unit_memory_t() const
Return the memory type for processing unit.
auto host_memory_t() const
Type of the host memory for arrays used in linear algebra operations.
int num_spins() const
Number of spin components.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
uint32_t ld() const
Return leading dimension size.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
auto num_wf() const
Return number of wave-functions.
auto actual_spin_index(spin_index s__) const
Return the actual spin index of the wave-functions.
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
memory_t
Memory types where the code can store data.
void zero(T *ptr__, size_t n__)
Zero the device memory.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
void apply_U_operator(Simulation_context &ctx__, wf::spin_range spins__, wf::band_range br__, wf::Wave_functions< T > const &hub_wf__, wf::Wave_functions< T > const &phi__, U_operator< T > const &um__, wf::Wave_functions< T > &hphi__)
void apply_S_operator_strain_deriv(sddk::memory_t mem__, int comp__, Beta_projector_generator< double > &bp__, beta_projectors_coeffs_t< double > &bp_coeffs__, Beta_projector_generator< double > &bp_strain_deriv__, beta_projectors_coeffs_t< double > &bp_strain_deriv_coeffs__, wf::Wave_functions< double > &phi__, Q_operator< double > &q_op__, wf::Wave_functions< double > &ds_phi__)
Apply strain derivative of S-operator to all scalar functions.
Contains declaration of sirius::Non_local_operator class.
Stores a chunk of the beta-projector and metadata.