SIRIUS 7.5.0
Electronic structure library and applications
sqnm.hpp
Go to the documentation of this file.
1/**
2 * @file sqnm.hpp
3 * @author Moritz Gubler (moritz.gubler@unibas.ch)
4 * @brief Implementation of the SQNM method. More informations about the algorithm can be found here: https://aip.scitation.org/doi/10.1063/1.4905665
5 * @date 2022-07-13
6 *
7 */
8
9#ifndef SQNM_HPP
10#define SQNM_HPP
11#include <Eigen/Dense>
12#include <iostream>
13#include <memory>
14#include "historylist.hpp"
15
16namespace vcsqnm {
17
18namespace sqnm_space
19{
20 class SQNM {
21 private:
22 int ndim;
23 int nhistx;
24 double eps_subsp = 1.e-3;
25 double alpha0 = 1.e-2;
26 std::unique_ptr<hlist_space::HistoryList> xlist;
27 std::unique_ptr<hlist_space::HistoryList> flist;
28 double alpha;
29 Eigen::VectorXd dir_of_descent;
30 double prev_f;
31 Eigen::VectorXd prev_df_dx;
32 Eigen::VectorXd expected_positions;
33 Eigen::MatrixXd h_subsp;
34 Eigen::MatrixXd h_evec_subsp;
35 Eigen::MatrixXd h_evec;
36 Eigen::VectorXd h_eval;
37 Eigen::VectorXd res;
38 Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> esolve;
39 Eigen::VectorXd res_temp;
40 int nhist = 0;
41 bool estimate_step_size = false;
42
43 public:
44 /**
45 * @brief Construct a new SQNM::SQNM object using default parameters
46 *
47 * @param ndim_ number of dimensions of target function
48 * @param nhistx_ Maximal number of steps that will be stored in the history list. Use a value between 3 and 20. Must be <= than ndim_.
49 * @param alpha_ initial step size. default is 1.0. For systems with hard bonds (e.g. C-C) use a value between and 1.0 and 2.5
50 * Should be approximately the inverse of the largest eigenvalue of the Hessian matrix.
51 * If alpha is negative, the inial step size is estimated using the mechanism from section 6.4 of the
52 * vc-sqnm paper: https://arxiv.org/abs/2206.07339
53 * beta will then be equal to minus alpha. Good choices for beta are 0.1 in hartee / bohr^2 and
54 * 0.001 in eV / A^2
55 */
56 SQNM(int ndim_, int nhistx_, double alpha_) {
57 ndim = ndim_;
58 nhistx = nhistx_;
59 if (alpha_ <= 0)
60 {
61 this->estimate_step_size = true;
62 this->alpha = -alpha_;
63 } else
64 {
65 alpha = alpha_;
66 }
67
68 xlist = std::make_unique<hlist_space::HistoryList>(ndim, nhistx);
69 flist = std::make_unique<hlist_space::HistoryList>(ndim, nhistx);
70 }
71
72 /**
73 * @brief Construct a new SQNM::SQNM object using custom parameters.
74 *
75 * @param ndim_ number of dimensions of target function
76 * @param nhistx_ Maximal number of steps that will be stored in the history list. Use a value between 3 and 20. Must be <= than ndim_.
77 * @param alpha_ initial step size. default is 1.0. For systems with hard bonds (e.g. C-C) use a value between and 1.0 and 2.5.
78 * Should be approximately the inverse of the largest eigenvalue of the Hessian matrix.
79 * If alpha is negative, the inial step size is estimated using the mechanism from section 6.4 of the
80 * vc-sqnm paper: https://arxiv.org/abs/2206.07339
81 * beta will then be equal to minus alpha. Good choices for beta are 0.1 in hartee / bohr^2 and
82 * 0.001 in eV / A^2
83 * @param alpha0_ * @param alpha0 Lower limit on the step size. 1.e-2 is the default.
84 * @param eps_subsp_ Lower limit on linear dependencies of basis vectors in history list. Default 1.e-4.
85 */
86 SQNM(int ndim_, int nhistx_, double alpha_, double alpha0_, double eps_subsp_) {
87 ndim = ndim_;
88 nhistx = nhistx_;
89 if (alpha_ <= 0)
90 {
91 this->estimate_step_size = true;
92 this->alpha = -alpha_;
93 } else
94 {
95 alpha = alpha_;
96 }
97 xlist = std::make_unique<hlist_space::HistoryList>(ndim, nhistx);
98 flist = std::make_unique<hlist_space::HistoryList>(ndim, nhistx);
99 alpha0 = alpha0_;
100 eps_subsp = eps_subsp_;
101 }
102
103 /**
104 * @brief Calculates new coordinates that are closer to local minimum that the current coordinates. This function should be used the following way:
105 * 1. calculate f(x) and the derivative.
106 * 2. call the step function.
107 * 3. add return value of step function to x.
108 * 4. repeat.
109 *
110 * @param x Current position vector
111 * @param f_of_x value of the target function evaluated at position x.
112 * @param df_dx derivative of the target function evaluated at x.
113 * @return VectorXd displacent that can be added to x in order to get new improved coordinates.
114 */
115 Eigen::VectorXd step(Eigen::VectorXd &x, double &f_of_x, Eigen::VectorXd &df_dx) {
116
117 // check if forces are zero. If so zero is returned because a local minimum has already been found.
118 if (df_dx.norm() <= 10.0e-13)
119 {
120 this->dir_of_descent.setZero(ndim);
121 return this->dir_of_descent;
122 }
123
124 nhist = xlist->add(x);
125 flist->add(df_dx);
126 if (nhist == 0) { // initial and first step
127 this->dir_of_descent = - alpha * df_dx;
128 } else
129 {
130 // check if positions have been changed and print a warning if they were.
131 if ((x - expected_positions).norm() > 10e-8)
132 {
133 std::cerr << "SQNM was not called with positions that were expected. If this was not done on purpose, it is probably a bug.\n";
134 std::cerr << "Were atoms that left the simulation box put back into the cell? This is not allowed.\n";
135 }
136
137 if (this->estimate_step_size)
138 {
139 double prev_df_squared = std::pow(prev_df_dx.norm(), 2);
140 double l1 = (f_of_x - prev_f + alpha * prev_df_squared) / (0.5 * alpha * alpha * prev_df_squared);
141 double l2 = (df_dx - prev_df_dx).norm() / (alpha * prev_df_dx.norm());
142 alpha = 1.0 / std::max(l1, l2);
143 std::cout << "Automatic initial step size guess: " << alpha << '\n';
144 this->estimate_step_size = false;
145 } else
146 {
147 double gainratio = calc_gainratio(f_of_x);
148 adjust_stepsize(gainratio);
149 }
150
151 Eigen::MatrixXd S = calc_ovrlp();
152 esolve.compute(S);
153 Eigen::VectorXd S_eval = esolve.eigenvalues();
154 Eigen::MatrixXd S_evec = esolve.eigenvectors();
155
156 // compute eq 8
157 int dim_subsp = 0;
158 for (int i = 0; i < S_eval.size(); i++){
159 if (S_eval(i) / S_eval(nhist-1) > eps_subsp)
160 {
161 dim_subsp+=1;
162 }
163 }
164 // remove dimensions from subspace
165 for (int i = 0; i < dim_subsp; i++)
166 {
167 S_evec.col(i) = S_evec.col(nhist - dim_subsp + i);
168 S_eval(i) = S_eval(nhist - dim_subsp + i);
169 }
170
171 Eigen::MatrixXd dr_subsp(ndim, dim_subsp);
172 dr_subsp.setZero();
173 for (int i = 0; i < dim_subsp; i++) {
174 for (int ihist = 0; ihist < nhist; ihist++){
175 dr_subsp.col(i) += S_evec(ihist, i) * xlist->normalized_difflist.col(ihist);
176 }
177 dr_subsp.col(i) /= sqrt(S_eval(i));
178 }
179
180 // compute eq. 11
181 Eigen::MatrixXd df_subsp(ndim, dim_subsp);
182 df_subsp.setZero();
183 for (int i = 0; i < dim_subsp; i++) {
184 for (int ihist = 0; ihist < nhist; ihist++){
185 df_subsp.col(i) += S_evec(ihist, i) * flist->difflist.col(ihist) / xlist->difflist.col(ihist).norm();
186 }
187 df_subsp.col(i) /= sqrt(S_eval(i));
188 }
189 // compute eq. 13
190 h_subsp = .5 * (df_subsp.transpose() * dr_subsp + dr_subsp.transpose() * df_subsp);
191 esolve.compute(h_subsp);
192 h_eval = esolve.eigenvalues();
193 h_evec_subsp = esolve.eigenvectors();
194
195 // compute eq. 15
196 h_evec.resize(ndim, dim_subsp);
197 h_evec.setZero();
198 for (int i = 0; i < dim_subsp; i++){
199 for (int k = 0; k < dim_subsp; k++){
200 h_evec.col(i) += h_evec_subsp(k, i) * dr_subsp.col(k);
201 }
202 }
203
204 // compute residues (eq. 20)
205 res.resize(dim_subsp);
206 for (int j = 0; j < dim_subsp; j++){
207 res_temp = - h_eval(j) * h_evec.col(j);
208 for (int k = 0; k < dim_subsp; k++){
209 res_temp += h_evec_subsp(k, j) * df_subsp.col(k);
210 }
211 res(j) = res_temp.norm();
212 }
213
214 // modify eigenvalues (eq. 18)
215 for (int idim = 0; idim < dim_subsp; idim++){
216 h_eval(idim) = sqrt(pow(h_eval(idim), 2) + pow(res(idim), 2));
217 }
218
219 // decompose gradient (eq. 16)
220 dir_of_descent = df_dx;
221 for (int i = 0; i < dim_subsp; i++){
222 dir_of_descent -= h_evec.col(i).dot(df_dx) * h_evec.col(i);
223 }
224 dir_of_descent *= alpha;
225
226 // appy preconditioning to subspace gradient (eq. 21)
227 for (int idim = 0; idim < dim_subsp; idim++)
228 {
229 dir_of_descent += (df_dx.dot(h_evec.col(idim)) / h_eval(idim)) * h_evec.col(idim);
230 }
231 dir_of_descent *= -1.0;
232
233 }
234 expected_positions = x + dir_of_descent;
235 prev_f = f_of_x;
236 prev_df_dx = df_dx;
237 return this->dir_of_descent;
238 }
239
240 /**
241 * @brief Estimates a lower bound of the energy of the local minimum
242 *
243 * @return double Lower bound estimate
244 */
245 double lower_bound()
246 {
247 if (this->nhist == 0) {
248 std::cout << "No lower bound estimate can be given yet.";
249 return 0.0;
250 }
251 return this->prev_f - 0.5 * this->prev_df_dx.dot(this->prev_df_dx) / this->h_eval(0);
252 }
253
254 private:
255 double calc_gainratio(double &f){
256 double gr = (f - prev_f) / ( .5 * this->dir_of_descent.dot(prev_df_dx));
257 return gr;
258 }
259
260 void adjust_stepsize(double &gainratio){
261 if ( gainratio < 0.5 ) alpha = std::max(alpha * 0.65, alpha0);
262 else if(gainratio > 1.05) alpha = alpha * 1.05;
263 }
264
265 Eigen::MatrixXd calc_ovrlp(){
266 Eigen::MatrixXd S = xlist->normalized_difflist.block(0,0, ndim, nhist).transpose() * xlist->normalized_difflist.block(0,0, ndim, nhist);
267 return S;
268 }
269 };
270}
271
272}
273#endif
double lower_bound()
Estimates a lower bound of the energy of the local minimum.
Definition: sqnm.hpp:245
Eigen::VectorXd step(Eigen::VectorXd &x, double &f_of_x, Eigen::VectorXd &df_dx)
Calculates new coordinates that are closer to local minimum that the current coordinates....
Definition: sqnm.hpp:115
SQNM(int ndim_, int nhistx_, double alpha_, double alpha0_, double eps_subsp_)
Construct a new SQNM::SQNM object using custom parameters.
Definition: sqnm.hpp:86
SQNM(int ndim_, int nhistx_, double alpha_)
Construct a new SQNM::SQNM object using default parameters.
Definition: sqnm.hpp:56
Implementation of the SQNM method. More informations about the algorithm can be found here: https://a...
Variable cell stable quasi-Newton lattice optimizer.
Definition: historylist.hpp:13