Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 : Copyright (c) 2016-2021 The VES code team 3 : (see the PEOPLE-VES file at the root of this folder for a list of names) 4 : 5 : See http://www.ves-code.org for more information. 6 : 7 : This file is part of VES code module. 8 : 9 : The VES code module is free software: you can redistribute it and/or modify 10 : it under the terms of the GNU Lesser General Public License as published by 11 : the Free Software Foundation, either version 3 of the License, or 12 : (at your option) any later version. 13 : 14 : The VES code module is distributed in the hope that it will be useful, 15 : but WITHOUT ANY WARRANTY; without even the implied warranty of 16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 : GNU Lesser General Public License for more details. 18 : 19 : You should have received a copy of the GNU Lesser General Public License 20 : along with the VES code module. If not, see <http://www.gnu.org/licenses/>. 21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ 22 : 23 : #include "Optimizer.h" 24 : #include "CoeffsVector.h" 25 : #include "CoeffsMatrix.h" 26 : 27 : #include "core/ActionRegister.h" 28 : #include "core/PlumedMain.h" 29 : 30 : 31 : 32 : namespace PLMD { 33 : namespace ves { 34 : 35 : //+PLUMEDOC VES_OPTIMIZER OPT_AVERAGED_SGD 36 : /* 37 : Averaged stochastic gradient decent with fixed step size. 38 : 39 : ## Algorithm 40 : 41 : This optimizer updates the coefficients according to the averaged stochastic gradient decent algorithm described [here](https://proceedings.neurips.cc/paper_files/paper/2013/file/7fe1f8abaad094e0b5cb1b01d712f708-Paper.pdf). This algorithm considers two sets of coefficients, the so-called instantaneous coefficients that are updated according to the recursion formula given by 42 : 43 : $$ 44 : \boldsymbol{\alpha}^{(n+1)} = \boldsymbol{\alpha}^{(n)} - 45 : \mu \left[ 46 : \nabla \Omega(\bar{\boldsymbol{\alpha}}^{(n)}) + 47 : \mathbf{H}(\bar{\boldsymbol{\alpha}}^{(n)}) 48 : [\boldsymbol{\alpha}^{(n)}-\bar{\boldsymbol{\alpha}}^{(n)}] 49 : \right], 50 : $$ 51 : 52 : where $\mu$ is a fixed step size and the gradient $ \nabla\Omega(\bar{\boldsymbol{\alpha}}^{(n)})$ and the Hessian $\mathbf{H}(\bar{\boldsymbol{\alpha}}^{(n)})$ depend on the averaged coefficients defined as 53 : 54 : $$ 55 : \bar{\boldsymbol{\alpha}}^{(n)} = \frac{1}{n+1} \sum_{k=0}^{n} \boldsymbol{\alpha}^{(k)}. 56 : $$ 57 : 58 : This means that the bias acting on the system depends on the averaged coefficients $\bar{\boldsymbol{\alpha}}^{(n)}$ which leads to a smooth convergence of the bias and the estimated free energy surface. Furthermore, this allows for a rather short sampling time for each iteration, for classical MD simulations typical sampling times are on the order of few ps (around 1000-4000 MD steps). 59 : 60 : Currently it is only supported to employ the diagonal part of the Hessian which is generally sufficient. Support for employing the full Hessian will be added later on. 61 : 62 : The VES bias that is to be optimized should be specified using the 63 : BIAS keyword. 64 : The fixed step size $\mu$ is given using the STEPSIZE keyword. 65 : The frequency of updating the coefficients is given using the 66 : STRIDE keyword where the value is given in the number of MD steps. 67 : For example, if the MD time step is 0.02 ps and STRIDE=2000 will the 68 : coefficients be updated every 4 ps. 69 : The coefficients will be outputted to the file given by the 70 : COEFFS_FILE keyword. How often the coefficients are written 71 : to this file is controlled by the COEFFS_OUTPUT keyword. 72 : 73 : If the VES bias employs a dynamic target distribution that needs to be 74 : iteratively updated (e.g. [TD_WELLTEMPERED](TD_WELLTEMPERED.md)) the second paper cited below, you will need to specify 75 : the stride for updating the target distribution by using 76 : the TARGETDIST_STRIDE keyword where the stride 77 : is given in terms coefficient iterations. For example if the 78 : MD time step is 0.02 ps and STRIDE=1000, such that the coefficients 79 : are updated every 2 ps, will TARGETDIST_STRIDE=500 mean that the 80 : target distribution will be updated every 1000 ps. 81 : 82 : The output of the free energy surfaces and biases is controlled by the FES_OUTPUT and the BIAS_OUTPUT 83 : keywords. It is also possible to output one-dimensional projections of the free energy surfaces 84 : by using the FES_PROJ_OUTPUT keyword but for that to work you will need to select 85 : for which argument to do the projections by using the numbered PROJ_ARG keyword in 86 : the VES bias that is optimized. 87 : You can also output dynamic target distributions by using the 88 : TARGETDIST_OUTPUT and TARGETDIST_PROJ_OUTPUT keywords. 89 : 90 : It is possible to start the optimization from some initial set of 91 : coefficients that have been previously obtained by using the INITIAL_COEFFS 92 : keyword. 93 : 94 : When restarting simulations it should be sufficient to put the [RESTART](RESTART.md) action 95 : in the beginning of the input files (or some MD codes the PLUMED should automatically 96 : detect if it is a restart run) and keep the same input as before The restarting of 97 : the optimization should be automatic as the optimizer will then read in the 98 : coefficients from the file given in COEFFS_FILE. For dynamic target 99 : distribution the code will also read in the final target distribution from the 100 : previous run (which is always outputted even if the TARGETDIST_OUTPUT keyword 101 : is not used). 102 : 103 : This optimizer supports the usage of multiple walkers where different copies of the system share the same bias potential (i.e. coefficients) and cooperatively sample the averages needed for the gradient and Hessian. This can significantly help with convergence in difficult cases. It is of course best to start the different copies from different positions in CV space. To activate this option you just need to add the MULTIPLE_WALKERS flag. Note that this is only supported if the MD code support running multiple replicas connected via MPI. 104 : 105 : The optimizer supports the usage of a so-called mask file that can be used to employ different step sizes for different coefficients and/or deactivate the optimization of certain coefficients (by putting values of 0.0). The mask file is read in by using the MASK_FILE keyword and should be in the same format as the coefficient file. It is possible to generate a template mask file by using the OUTPUT_MASK_FILE keyword. 106 : 107 : ## Examples 108 : 109 : In the following input we employ an averaged stochastic gradient decent with a 110 : fixed step size of 1.0 and update the coefficient every 1000 MD steps 111 : (e.g. every 2 ps if the MD time step is 0.02 ps). The coefficient are outputted 112 : to the coefficients.data every 50 iterations while the FES and bias is outputted 113 : to files every 500 iterations (e.g. every 1000 ps). 114 : 115 : ```plumed 116 : phi: TORSION ATOMS=5,7,9,15 117 : 118 : bf1: BF_FOURIER ORDER=5 MINIMUM=-pi MAXIMUM=pi 119 : 120 : VES_LINEAR_EXPANSION ... 121 : ARG=phi 122 : BASIS_FUNCTIONS=bf1 123 : LABEL=ves1 124 : TEMP=300.0 125 : GRID_BINS=100 126 : ... VES_LINEAR_EXPANSION 127 : 128 : OPT_AVERAGED_SGD ... 129 : BIAS=ves1 130 : STRIDE=1000 131 : LABEL=o1 132 : STEPSIZE=1.0 133 : COEFFS_FILE=coefficients.data 134 : COEFFS_OUTPUT=50 135 : FES_OUTPUT=500 136 : BIAS_OUTPUT=500 137 : ... OPT_AVERAGED_SGD 138 : ``` 139 : 140 : 141 : In the following example we employ a well-tempered target distribution that 142 : is updated every 500 iterations (e.g. every 1000 ps). The target distribution is 143 : also output to a file every 2000 iterations (the TARGETDIST_OUTPUT keyword). 144 : Here we also employ MULTIPLE_WALKERS flag to enable the usage of 145 : multiple walkers. 146 : 147 : ```plumed 148 : #SETTINGS NREPLICAS=2 149 : phi: TORSION ATOMS=5,7,9,15 150 : psi: TORSION ATOMS=7,9,15,17 151 : 152 : bf1: BF_FOURIER ORDER=5 MINIMUM=-pi MAXIMUM=pi 153 : bf2: BF_FOURIER ORDER=4 MINIMUM=-pi MAXIMUM=pi 154 : 155 : td1: TD_WELLTEMPERED BIASFACTOR=10 156 : 157 : VES_LINEAR_EXPANSION ... 158 : ARG=phi,psi 159 : BASIS_FUNCTIONS=bf1,bf2 160 : LABEL=ves1 161 : TEMP=300.0 162 : GRID_BINS=100,100 163 : TARGET_DISTRIBUTION=td1 164 : PROJ_ARG1=phi 165 : PROJ_ARG2=psi 166 : ... VES_LINEAR_EXPANSION 167 : 168 : OPT_AVERAGED_SGD ... 169 : BIAS=ves1 170 : STRIDE=1000 171 : LABEL=o1 172 : STEPSIZE=1.0 173 : MULTIPLE_WALKERS 174 : COEFFS_FILE=coefficients.data 175 : COEFFS_OUTPUT=50 176 : FES_OUTPUT=500 177 : FES_PROJ_OUTPUT=500 178 : BIAS_OUTPUT=500 179 : TARGETDIST_STRIDE=500 180 : TARGETDIST_OUTPUT=2000 181 : ... OPT_AVERAGED_SGD 182 : ``` 183 : 184 : 185 : 186 : */ 187 : //+ENDPLUMEDOC 188 : 189 : class Opt_BachAveragedSGD : public Optimizer { 190 : private: 191 : std::vector<std::unique_ptr<CoeffsVector>> combinedgradient_pntrs_; 192 : unsigned int combinedgradient_wstride_; 193 : std::vector<std::unique_ptr<OFile>> combinedgradientOFiles_; 194 : double decaying_aver_tau_; 195 : private: 196 : CoeffsVector& CombinedGradient(const unsigned int c_id) const { 197 60 : return *combinedgradient_pntrs_[c_id]; 198 : } 199 : double getAverDecay() const; 200 : public: 201 : static void registerKeywords(Keywords&); 202 : explicit Opt_BachAveragedSGD(const ActionOptions&); 203 : void coeffsUpdate(const unsigned int c_id = 0) override; 204 : }; 205 : 206 : 207 : PLUMED_REGISTER_ACTION(Opt_BachAveragedSGD,"OPT_AVERAGED_SGD") 208 : 209 : 210 77 : void Opt_BachAveragedSGD::registerKeywords(Keywords& keys) { 211 77 : Optimizer::registerKeywords(keys); 212 77 : Optimizer::useFixedStepSizeKeywords(keys); 213 77 : Optimizer::useMultipleWalkersKeywords(keys); 214 77 : Optimizer::useHessianKeywords(keys); 215 77 : Optimizer::useMaskKeywords(keys); 216 77 : Optimizer::useRestartKeywords(keys); 217 77 : Optimizer::useMonitorAverageGradientKeywords(keys); 218 77 : Optimizer::useDynamicTargetDistributionKeywords(keys); 219 77 : keys.add("hidden","COMBINED_GRADIENT_FILE","the name of output file for the combined gradient (gradient + Hessian term)"); 220 77 : keys.add("hidden","COMBINED_GRADIENT_OUTPUT","how often the combined gradient should be written to file. This parameter is given as the number of bias iterations. It is by default 100 if COMBINED_GRADIENT_FILE is specficed"); 221 77 : keys.add("hidden","COMBINED_GRADIENT_FMT","specify format for combined gradient file(s) (useful for decrease the number of digits in regtests)"); 222 77 : keys.add("optional","EXP_DECAYING_AVER","calculate the averaged coefficients using exponentially decaying averaging using the decaying constant given here in the number of iterations"); 223 : 224 77 : keys.addDOI("10.1021/acs.jctc.5b00076"); 225 77 : } 226 : 227 : 228 : 229 75 : Opt_BachAveragedSGD::Opt_BachAveragedSGD(const ActionOptions&ao): 230 : PLUMED_VES_OPTIMIZER_INIT(ao), 231 75 : combinedgradient_wstride_(100), 232 75 : decaying_aver_tau_(0.0) { 233 75 : log.printf(" Averaged stochastic gradient decent, see and cite "); 234 150 : log << plumed.cite("Bach and Moulines, NIPS 26, 773-781 (2013)"); 235 75 : log.printf("\n"); 236 75 : unsigned int decaying_aver_tau_int=0; 237 75 : parse("EXP_DECAYING_AVER",decaying_aver_tau_int); 238 75 : if(decaying_aver_tau_int>0) { 239 2 : decaying_aver_tau_ = static_cast<double>(decaying_aver_tau_int); 240 2 : log.printf(" Coefficients calculated using an exponentially decaying average with a decaying constant of %u iterations, see and cite ",decaying_aver_tau_int); 241 4 : log << plumed.cite("Invernizzi, Valsson, and Parrinello, Proc. Natl. Acad. Sci. USA 114, 3370-3374 (2017)"); 242 2 : log.printf("\n"); 243 : } 244 : // 245 : std::vector<std::string> combinedgradient_fnames; 246 75 : parseFilenames("COMBINED_GRADIENT_FILE",combinedgradient_fnames); 247 150 : parse("COMBINED_GRADIENT_OUTPUT",combinedgradient_wstride_); 248 75 : setupOFiles(combinedgradient_fnames,combinedgradientOFiles_,useMultipleWalkers()); 249 75 : std::string combinedgradient_fmt=""; 250 150 : parse("COMBINED_GRADIENT_FMT",combinedgradient_fmt); 251 75 : if(combinedgradient_fnames.size()>0) { 252 12 : for(unsigned int i=0; i<numberOfCoeffsSets(); i++) { 253 12 : auto combinedgradient_tmp = Tools::make_unique<CoeffsVector>(*getGradientPntrs()[i]); 254 6 : std::string label = getGradientPntrs()[i]->getLabel(); 255 6 : if(label.find("gradient")!=std::string::npos) { 256 12 : label.replace(label.find("gradient"), std::string("gradient").length(), "combined_gradient"); 257 : } else { 258 : label += "_combined"; 259 : } 260 6 : combinedgradient_tmp->setLabels(label); 261 6 : if(combinedgradient_fmt.size()>0) { 262 : combinedgradient_tmp->setOutputFmt(combinedgradient_fmt); 263 : } 264 6 : combinedgradient_pntrs_.emplace_back(std::move(combinedgradient_tmp)); 265 6 : } 266 : // 267 6 : if(numberOfCoeffsSets()==1) { 268 12 : log.printf(" Combined gradient (gradient + Hessian term) will be written out to file %s every %u iterations\n",combinedgradientOFiles_[0]->getPath().c_str(),combinedgradient_wstride_); 269 : } else { 270 0 : log.printf(" Combined gradient (gradient + Hessian term) will be written out to the following files every %u iterations:\n",combinedgradient_wstride_); 271 0 : for(unsigned int i=0; i<combinedgradientOFiles_.size(); i++) { 272 0 : log.printf(" coefficient set %u: %s\n",i,combinedgradientOFiles_[i]->getPath().c_str()); 273 : } 274 : } 275 : } 276 : // 277 : 278 75 : turnOnHessian(); 279 75 : checkRead(); 280 75 : } 281 : 282 : 283 22675 : void Opt_BachAveragedSGD::coeffsUpdate(const unsigned int c_id) { 284 : // 285 22675 : if(combinedgradientOFiles_.size()>0 && (getIterationCounter()+1)%combinedgradient_wstride_==0) { 286 60 : CombinedGradient(c_id).setValues( ( Gradient(c_id) + Hessian(c_id)*(AuxCoeffs(c_id)-Coeffs(c_id)) ) ); 287 60 : combinedgradient_pntrs_[c_id]->setIterationCounterAndTime(getIterationCounter()+1,getTime()); 288 60 : combinedgradient_pntrs_[c_id]->writeToFile(*combinedgradientOFiles_[c_id]); 289 : } 290 : // 291 : double aver_decay = getAverDecay(); 292 22675 : AuxCoeffs(c_id) += - StepSize(c_id)*CoeffsMask(c_id) * ( Gradient(c_id) + Hessian(c_id)*(AuxCoeffs(c_id)-Coeffs(c_id)) ); 293 : //AuxCoeffs() = AuxCoeffs() - StepSize() * ( Gradient() + Hessian()*(AuxCoeffs()-Coeffs()) ); 294 22675 : Coeffs(c_id) += aver_decay * ( AuxCoeffs(c_id)-Coeffs(c_id) ); 295 22675 : } 296 : 297 : 298 : inline 299 : double Opt_BachAveragedSGD::getAverDecay() const { 300 22675 : double aver_decay = 1.0 / ( getIterationCounterDbl() + 1.0 ); 301 22675 : if(decaying_aver_tau_ > 0.0 && (getIterationCounterDbl() + 1.0) > decaying_aver_tau_) { 302 14 : aver_decay = 1.0 / decaying_aver_tau_; 303 : } 304 : return aver_decay; 305 : } 306 : 307 : 308 : } 309 : }