SIRIUS 7.5.0
Electronic structure library and applications
broyden2_mixer.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Simon Frasch, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file broyden2_mixer.hpp
21 *
22 * \brief Contains definition and implementation of sirius::Broyden2.
23 */
24
25#ifndef __BROYDEN2_MIXER_HPP__
26#define __BROYDEN2_MIXER_HPP__
27
28#include <tuple>
29#include <functional>
30#include <utility>
31#include <vector>
32#include <limits>
33#include <memory>
34#include <exception>
35#include <cmath>
36#include <numeric>
37
38#include "SDDK/memory.hpp"
39#include "mixer/mixer.hpp"
40
41namespace sirius {
42namespace mixer {
43
44/// Broyden mixer.
45/**
46 * Quasi-Newton, limited-memory method which updates \f$ x_{n+1} = x_n - G_nf_n \f$
47 * where \f$ G_n \f$ is an approximate inverse Jacobian. Broyden 2 is derived
48 * by recursively taking rank-1 updates to the inverse Jacobian.
49 * It requires the secant equation \f$ G_{n+1}\Delta f_n = \Delta x_n\f$ to hold for the next approximate Jacobian
50 * \f$ G_{n+1}\f$, leading to the update:
51 * \f[
52 * G_{n+1} := G_n + (\Delta x_n - G_n \Delta f_n)(\Delta f_n^*\Delta f_n)^{-1}\Delta f_n^*.
53 * \f]
54 * By induction we can show that \f$ G_n \f$ satisfies
55 * \f[
56 * G_n = G_1\prod_{i=1}^{n-1}\left(I - \frac{\Delta f_i \Delta f_i^*}{\Delta f_i^*\Delta f_i}\right)
57 * + \sum_{i=1}^{n-1}\frac{\Delta x_i \Delta f_i^*}{\Delta f_i^*\Delta f_i}\prod_{j=i+1}^{n-1}
58 * \left(I - \frac{\Delta f_j \Delta f_j^*}{\Delta f_j^*\Delta f_j}\right)
59 * \f]
60 * which shows orthogonalization with respect to the \f$ \Delta f_i \f$ vectors. A practical
61 * implementation can be derived from the identity
62 * \f[
63 * G_n = G_1 + \sum_{i=1}^{n-1}\left(\Delta x_i - G_1\Delta f_i\right)\gamma_{i,n}^*
64 * \f]
65 * where
66 * \f[
67 * \gamma_{i,n}^* = \frac{\Delta f_i^*}{\Delta f_i^*\Delta f_i}\prod_{j=i+1}^{n-1}
68 * \left(I - \frac{\Delta f_j\Delta f_j^*}{\Delta f_j^*\Delta f_j}\right).
69 * \f]
70 * The \f$ \gamma_{i,n}\f$ vector satisfies the following recursion relation:
71 * \f[
72 * \gamma_{i,n}^* = \frac{\Delta f_i^*}{\Delta f_i^*\Delta f_i}\left(I - \sum_{j=i+1}^{n-1}
73 * \Delta f_j \gamma_{j,n}^*\right)
74 * \f]
75 * When updating \f$ x_{n+1} = x_n - G_nf_n\f$ we only have to compute:
76 * \f[
77 * \alpha_i := \gamma_i^*f_n = \frac{1}{\Delta f_i^*\Delta f_i}\left[\Delta f_i^*f_n
78 * - \sum_{j=i+1}^{n-1}\alpha_j\Delta f_i^* \Delta f_j \right]
79 * \f]
80 * for \f$ i \f$ from \f$ n-1\f$ down to \f$ 1 \f$ so that
81 * \f[\begin{aligned}
82 * x_{n+1} &= x_n - G_nf_n \\
83 * &= x_n - G_1f_n + \sum_{i=1}^{n-1}\alpha_iG_1\Delta f_i
84 * - \sum_{i=1}^{n-1}\alpha_i\Delta x_i.
85 * \end{aligned}\f]
86 * In particular when \f$ G_1 = -\beta I\f$ we get the following update:
87 * \f[
88 * x_{n+1} = x_n + \beta f_n - \sum_{i=1}^{n-1}\alpha_i \beta \Delta f_i - \sum_{i=1}^{n-1}\alpha_i\Delta x_i.
89 * \f]
90 * Finally, we store the vectors \f$ f_1, \cdots, f_n \f$ and \f$ x_1, \dots, x_n \f$ and update the Gram-matrix
91 * \f$ S_{ij} = f_i^*f_j \f$ in every iteration. The \f$ \alpha_i \f$ coefficients can be easily computed from
92 * \f$ S \f$.
93 *
94 * \note
95 * This class does not do anything to improve the stability of the Gram-Schmidt procedure (clearly visible in the
96 * first explicit expression for \f$ G_n \f$) and can therefore be very unstable for larger history sizes.
97 */
98template <typename... FUNCS>
99class Broyden2 : public Mixer<FUNCS...>
100{
101 public:
102 Broyden2(std::size_t max_history, double beta, double beta0, double beta_scaling_factor, double linear_mix_rmse_tol)
103 : Mixer<FUNCS...>(max_history)
104 , beta_(beta)
105 , beta0_(beta0)
106 , beta_scaling_factor_(beta_scaling_factor)
107 , linear_mix_rmse_tol_(linear_mix_rmse_tol)
108 , S_(max_history, max_history)
109 , gamma_(max_history)
110 {
111 }
112
113 void mix_impl() override
114 {
115 const auto idx_step = this->idx_hist(this->step_);
116 const auto idx_next_step = this->idx_hist(this->step_ + 1);
117
118 const auto n = static_cast<int>(std::min(this->step_, this->max_history_ - 1));
119
120 const bool normalize = false;
121
122 for (int i = 0; i <= n; ++i) {
123 int j = this->idx_hist(this->step_ - i);
124 this->S_(n - i, n) = this->S_(n, n - i) = this->template inner_product<normalize>(
125 this->residual_history_[j],
126 this->residual_history_[idx_step]
127 );
128 }
129
130 // Expand (I - Δf₁Δf₁ᵀ/Δf₁ᵀΔf₁)...(I - Δfₙ₋₁Δfₙ₋₁ᵀ/Δfₙ₋₁ᵀΔfₙ₋₁)fₙ
131 // to γ₁Δf₁ + ... + γₙ₋₁Δfₙ₋₁ + fₙ
132 // the denominator ΔfᵢᵀΔfᵢ is constant, so apply only at the end
133 for (int i = 1; i <= n; ++i) {
134 // compute -Δfᵢᵀfₙ
135 // = -(fᵢ₊₁ᵀfₙ - fᵢᵀfₙ)
136 this->gamma_(n - i) = this->S_(n - i, n) - this->S_(n - i + 1, n);
137
138 for (int j = 1; j < i; ++j) {
139 // compute -ΔfᵢᵀΔfⱼ
140 // = -(fᵢ₊₁ - fᵢ)(fⱼ₊₁ - fⱼ)
141 // = -fᵢ₊₁ᵀfⱼ₊₁ + fᵢᵀfⱼ₊₁ + fᵢ₊₁ᵀfⱼ - fᵢᵀfⱼ
142 this->gamma_(n - i) += (-this->S_(n - i + 1, n - j + 1) + this->S_(n - i + 1, n - j) + this->S_(n - i, n - j + 1) - this->S_(n - i, n - j)) * this->gamma_(n - j);
143 }
144
145 this->gamma_(n - i) /= this->S_(n - i + 1, n - i + 1) - this->S_(n - i + 1, n - i) - this->S_(n - i, n - i + 1) + this->S_(n - i, n - i);
146 }
147
148 this->copy(this->output_history_[idx_step], this->input_);
149
150 if (n > 0) {
151 // first vec is special
152 {
153 int j = this->idx_hist(this->step_ - n);
154 this->axpy(-this->beta_ * this->gamma_(0), this->residual_history_[j], this->input_);
155 this->axpy(-this->gamma_(0), this->output_history_[j], this->input_);
156 }
157
158 for (int i = 1; i < n; ++i) {
159 auto coeff = this->gamma_(n - i - 1) - this->gamma_(n - i);
160 int j = this->idx_hist(this->step_ - i);
161 this->axpy(this->beta_ * coeff, this->residual_history_[j], this->input_);
162 this->axpy(coeff, this->output_history_[j], this->input_);
163 }
164
165 // last vec is special.
166 {
167 int j = this->idx_hist(this->step_);
168 this->axpy(this->beta_ * (this->gamma_(n - 1) + 1), this->residual_history_[j], this->input_);
169 this->axpy(this->gamma_(n - 1), this->output_history_[j], this->input_);
170 }
171 } else {
172 // Linear mixing step.
173 this->axpy(this->beta_, this->residual_history_[idx_step], this->input_);
174 }
175
176 this->copy(this->input_, this->output_history_[idx_next_step]);
177
178 if (n == static_cast<int>(this->max_history_) - 1) {
179 for (int col = 0; col < n; ++col) {
180 for (int row = 0; row < n; ++row) {
181 this->S_(row, col) = this->S_(row + 1, col + 1);
182 }
183 }
184 }
185 }
186
187 private:
188 double beta_;
189 double beta0_;
190 double beta_scaling_factor_;
191 double linear_mix_rmse_tol_;
194};
195} // namespace mixer
196} // namespace sirius
197
198#endif // __MIXER_HPP__
Abstract mixer for variadic number of Function objects, which are described by FunctionProperties.
Definition: mixer.hpp:279
Memory management functions and classes.
Contains definition and implementation of sirius::Mixer base class.
Namespace of the SIRIUS library.
Definition: sirius.f90:5