SIRIUS 7.5.0
Electronic structure library and applications
anderson_mixer.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Simon Frasch, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file anderson_mixer.hpp
21 *
22 * \brief Contains definition and implementation sirius::Anderson.
23 */
24
25#ifndef __ANDERSON_MIXER_HPP__
26#define __ANDERSON_MIXER_HPP__
27
28#include <tuple>
29#include <functional>
30#include <utility>
31#include <vector>
32#include <limits>
33#include <memory>
34#include <exception>
35#include <cmath>
36#include <numeric>
37
38#include "SDDK/memory.hpp"
39#include "mixer/mixer.hpp"
40#include "core/la/linalg.hpp"
41
42namespace sirius {
43namespace mixer {
44
45/// Anderson mixer.
46/**
47 * Quasi-Newton limited-memory method which updates \f$ x_{n+1} = x_n - G_nf_n \f$
48 * where \f$ G_n \f$ is an approximate inverse Jacobian. Anderson is derived
49 * by taking the low-rank update to the inverse Jacobian
50 *
51 * \f[
52 * G_{n+1} = (G_n + \Delta X_n - G_n \Delta F_n)(\Delta F_n^T \Delta F_n)^{-1}\Delta F_n^T
53 * \f]
54 *
55 * such that the secant equations \f$ G_{n+1} \Delta F_n = \Delta X_n \f$ are satisfied for previous
56 * iterations. Then \f$ G_n \f$ is taken \f$ -\beta I \f$. The Anderson class explicitly constructs
57 * the Gram matrix \f$ \Delta F_n^T \Delta F_n \f$ to solve the least-squares problem. For more stability
58 * use Anderson_stable, which comes at the cost of orthogonalizing \f$ \Delta F_n \f$.
59 *
60 * Reference paper: Fang, Haw‐ren, and Yousef Saad. "Two classes of multisecant
61 * methods for nonlinear acceleration." Numerical Linear Algebra with Applications
62 * 16.3 (2009): 197-221.
63 */
64template <typename... FUNCS>
65class Anderson : public Mixer<FUNCS...>
66{
67 private:
68 double beta_;
69 double beta0_;
70 double beta_scaling_factor_;
72 sddk::mdarray<double, 2> S_factorized_;
73 std::size_t history_size_;
74 public:
75 Anderson(std::size_t max_history, double beta, double beta0, double beta_scaling_factor)
76 : Mixer<FUNCS...>(max_history)
77 , beta_(beta)
78 , beta0_(beta0)
79 , beta_scaling_factor_(beta_scaling_factor)
80 , S_(max_history - 1, max_history - 1)
81 , S_factorized_(max_history - 1, max_history - 1)
82 , history_size_(0)
83 {
84 }
85
86 void mix_impl() override
87 {
88 const auto idx_step = this->idx_hist(this->step_);
89 const auto idx_next_step = this->idx_hist(this->step_ + 1);
90 const auto idx_prev_step = this->idx_hist(this->step_ - 1);
91
92 const auto history_size = static_cast<int>(this->history_size_);
93
94 const bool normalize = false;
95
96 // beta scaling
97 if (this->step_ > this->max_history_) {
98 const double rmse_avg = std::accumulate(this->rmse_history_.begin(), this->rmse_history_.end(), 0.0) /
99 this->rmse_history_.size();
100 if (this->rmse_history_[idx_step] > rmse_avg) {
101 this->beta_ = std::max(beta0_, this->beta_ * beta_scaling_factor_);
102 }
103 }
104
105 // Set up the next x_{n+1} = x_n.
106 // Can't use this->output_history_[idx_step + 1] directly here,
107 // as it's still used when history is full.
108 this->copy(this->output_history_[idx_step], this->input_);
109
110 // + beta * f_n
111 this->axpy(this->beta_, this->residual_history_[idx_step], this->input_);
112
113 if (history_size > 0) {
114 // Compute the difference residual[step] - residual[step - 1]
115 // and store it in residual[step - 1], but don't destroy
116 // residual[step]
117 this->scale(-1.0, this->residual_history_[idx_prev_step]);
118 this->axpy(1.0, this->residual_history_[idx_step], this->residual_history_[idx_prev_step]);
119
120 // Do the same for difference x
121 this->scale(-1.0, this->output_history_[idx_prev_step]);
122 this->axpy(1.0, this->output_history_[idx_step], this->output_history_[idx_prev_step]);
123
124 // Compute the new Gram matrix for the least-squares problem
125 for (int i = 0; i <= history_size - 1; ++i) {
126 auto j = this->idx_hist(this->step_ - i - 1);
127 this->S_(history_size - 1, history_size - i - 1) = this->S_(history_size - i - 1, history_size - 1) = this->template inner_product<normalize>(
128 this->residual_history_[j],
129 this->residual_history_[idx_prev_step]
130 );
131 }
132
133 // Make a copy because factorizing destroys the matrix.
134 for (int i = 0; i < history_size; ++i)
135 for (int j = 0; j < history_size; ++j)
136 this->S_factorized_(j, i) = this->S_(j, i);
137
138 sddk::mdarray<double, 1> h(history_size);
139 for (int i = 1; i <= history_size; ++i) {
140 auto j = this->idx_hist(this->step_ - i);
141 h(history_size - i) = this->template inner_product<normalize>(
142 this->residual_history_[j],
143 this->residual_history_[idx_step]
144 );
145 }
146
147 bool invertible = la::wrap(la::lib_t::lapack).sysolve(history_size, this->S_factorized_, h);
148
149 if (invertible) {
150 // - beta * (delta F) * h
151 for (int i = 1; i <= history_size; ++i) {
152 auto j = this->idx_hist(this->step_ - i);
153 this->axpy(-this->beta_ * h(history_size - i), this->residual_history_[j], this->input_);
154 }
155
156 // - (delta X) * h
157 for (int i = 1; i <= history_size; ++i) {
158 auto j = this->idx_hist(this->step_ - i);
159 this->axpy(-h(history_size - i), this->output_history_[j], this->input_);
160 }
161 } else {
162 this->history_size_ = 0;
163 }
164 }
165
166 // In case history is full, set S_[1:end-1,1:end-1] .= S_[2:end,2:end]
167 if (this->history_size_ == this->max_history_ - 1) {
168 for (int col = 0; col <= history_size - 2; ++col) {
169 for (int row = 0; row <= history_size - 2; ++row) {
170 this->S_(row, col) = this->S_(row + 1, col + 1);
171 }
172 }
173 }
174
175 this->copy(this->input_, this->output_history_[idx_next_step]);
176 this->history_size_ = std::min(this->history_size_ + 1, this->max_history_ - 1);
177 }
178};
179} // namespace mixer
180} // namespace sirius
181
182#endif // __MIXER_HPP__
Abstract mixer for variadic number of Function objects, which are described by FunctionProperties.
Definition: mixer.hpp:279
Linear algebra interface.
Memory management functions and classes.
Contains definition and implementation of sirius::Mixer base class.
@ lapack
CPU LAPACK.
Namespace of the SIRIUS library.
Definition: sirius.f90:5