LCOV - code coverage report
Current view: top level - ves - Opt_Adam.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 67 72 93.1 %
Date: 2025-12-04 11:19:34 Functions: 3 4 75.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2016-2021 The VES code team
       3             :    (see the PEOPLE-VES file at the root of this folder for a list of names)
       4             : 
       5             :    See http://www.ves-code.org for more information.
       6             : 
       7             :    This file is part of VES code module.
       8             : 
       9             :    The VES code module is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    The VES code module is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with the VES code module.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : 
      23             : #include "Optimizer.h"
      24             : #include "CoeffsVector.h"
      25             : 
      26             : #include "core/ActionRegister.h"
      27             : #include "core/ActionSet.h"
      28             : 
      29             : 
      30             : namespace PLMD {
      31             : namespace ves {
      32             : 
      33             : //+PLUMEDOC VES_OPTIMIZER OPT_ADAM
      34             : /*
      35             : Adaptive moment estimation (ADAM) optimizer.
      36             : 
      37             : !!! attention ""
      38             : 
      39             :     __This optimizer is still experimental and not fully documented. The syntax might change. Restarting does not work. We recommend to use the averaged stochastic gradient decent optimizer ([OPT_AVERAGED_SGD](OPT_AVERAGED_SGD.md)) for now__.
      40             : 
      41             : */
      42             : //+ENDPLUMEDOC
      43             : 
      44             : class Opt_Adam: public Optimizer {
      45             : private:
      46             :   unsigned int time_;
      47             :   double beta_1_;
      48             :   double beta_2_;
      49             :   double epsilon_;
      50             :   double one_minus_weight_decay_;
      51             :   bool amsgrad_;
      52             :   bool adamw_;
      53             :   // 1st gradient moment uses the "AuxCoeffs", so only 2nd moment needs new CoeffVectors
      54             :   std::vector<std::unique_ptr<CoeffsVector>> var_coeffs_pntrs_;
      55             :   // used only for AMSGrad variant
      56             :   std::vector<std::unique_ptr<CoeffsVector>> varmax_coeffs_pntrs_;
      57             : protected:
      58             :   CoeffsVector& VarCoeffs(const unsigned int coeffs_id = 0) const;
      59             :   CoeffsVector& VarmaxCoeffs(const unsigned int coeffs_id = 0) const;
      60             : public:
      61             :   static void registerKeywords(Keywords&);
      62             :   explicit Opt_Adam(const ActionOptions&);
      63             :   void coeffsUpdate(const unsigned int c_id = 0) override;
      64             : };
      65             : 
      66             : inline
      67             : CoeffsVector& Opt_Adam::VarCoeffs(const unsigned int coeffs_id) const {
      68             :   return *var_coeffs_pntrs_[coeffs_id];
      69             : }
      70             : 
      71             : inline
      72             : CoeffsVector& Opt_Adam::VarmaxCoeffs(const unsigned int coeffs_id) const {
      73             :   return *varmax_coeffs_pntrs_[coeffs_id];
      74             : }
      75             : 
      76             : 
      77             : PLUMED_REGISTER_ACTION(Opt_Adam,"OPT_ADAM")
      78             : 
      79             : 
      80           4 : void Opt_Adam::registerKeywords(Keywords& keys) {
      81           4 :   Optimizer::registerKeywords(keys);
      82           4 :   Optimizer::useFixedStepSizeKeywords(keys);
      83           4 :   Optimizer::useMultipleWalkersKeywords(keys);
      84           4 :   Optimizer::useMaskKeywords(keys);
      85           4 :   Optimizer::useDynamicTargetDistributionKeywords(keys);
      86           4 :   keys.add("optional","BETA_1","Parameter for the first moment estimate. Defaults to 0.9");
      87           4 :   keys.add("optional","BETA_2","Parameter for the second moment estimate. Defaults to 0.999");
      88           4 :   keys.add("optional","EPSILON","Small parameter to avoid division by zero. Defaults to 1e-8");
      89           4 :   keys.add("optional","ADAMW_WEIGHT_DECAY","Weight decay parameter for the AdamW variant. Defaults to 0");
      90           4 :   keys.addFlag("AMSGRAD", false, "Use the AMSGrad variant");
      91           4 : }
      92             : 
      93             : 
      94           2 : Opt_Adam::Opt_Adam(const ActionOptions&ao):
      95             :   PLUMED_VES_OPTIMIZER_INIT(ao),
      96           2 :   time_(0),
      97           2 :   beta_1_(0.9),
      98           2 :   beta_2_(0.999),
      99           2 :   epsilon_(0.00000001),
     100           2 :   one_minus_weight_decay_(1.0),
     101           2 :   amsgrad_(false),
     102           2 :   adamw_(false),
     103           2 :   var_coeffs_pntrs_(0) {
     104             :   // add citation and print it to log
     105           2 :   log << "  Adam type stochastic gradient decent\n";
     106           2 :   parseFlag("AMSGRAD",amsgrad_);
     107           2 :   if (amsgrad_) {
     108           1 :     log << "  Using the AMSGrad variant of the Adam algorithm, see and cite\n";
     109             :   }
     110             : 
     111           2 :   double tmp_weight_decay = 0.0;
     112           2 :   parse("ADAMW_WEIGHT_DECAY",tmp_weight_decay);
     113           2 :   if (tmp_weight_decay != 0.0) {
     114           0 :     adamw_ = true;
     115           0 :     log << "  Using the AdamW variant (Adam with weight decay), see and cite\n";
     116           0 :     one_minus_weight_decay_ = 1 - tmp_weight_decay;
     117           0 :     log << "    weight decay parameter: " << tmp_weight_decay << "\n";
     118             :   }
     119             : 
     120           2 :   log << "  Adam parameters:\n";
     121           2 :   parse("BETA_1",beta_1_);
     122           2 :   plumed_massert(beta_1_ > 0 && beta_1_ <= 1, "BETA_1 must be between 0 and 1");
     123           2 :   log << "    beta_1: " << beta_1_ << "\n";
     124             : 
     125           2 :   parse("BETA_2",beta_2_);
     126           2 :   plumed_massert(beta_2_ > 0 && beta_2_ <= 1, "BETA_2 must be between 0 and 1");
     127           2 :   log << "    beta_2: " << beta_2_ << "\n";
     128             : 
     129           2 :   parse("EPSILON",epsilon_);
     130           2 :   plumed_massert(epsilon_ > 0 && epsilon_ <= 1, "EPSILON must be between 0 and 1");
     131           2 :   log << "    epsilon: " << epsilon_ << "\n";
     132             : 
     133             : 
     134             :   // set up the coeff vector for the 2nd moment of the gradient (variance)
     135           4 :   for (unsigned i = 0; i < numberOfCoeffsSets(); ++i) {
     136           2 :     var_coeffs_pntrs_.emplace_back(std::unique_ptr<CoeffsVector>(new CoeffsVector(Coeffs(i))));
     137           4 :     VarCoeffs(i).replaceLabelString("coeffs","grad_var");
     138           2 :     VarCoeffs(i).setAllValuesToZero(); // can Coeffs(i) even be non-zero at this point?
     139             : 
     140             :     // add second set of coefficients to store the maximum values of the 2nd moment
     141           2 :     if (amsgrad_) {
     142           1 :       varmax_coeffs_pntrs_.emplace_back(std::unique_ptr<CoeffsVector>(new CoeffsVector(VarCoeffs(i))));
     143           2 :       VarmaxCoeffs(i).replaceLabelString("coeffs","grad_varmax");
     144             :     }
     145             : 
     146             :     // also rename the Coeffs used for the mean of the gradient
     147           4 :     AuxCoeffs(i).replaceLabelString("coeffs","grad_mean");
     148             :   }
     149             : 
     150           2 :   checkRead();
     151           2 : }
     152             : 
     153             : 
     154          20 : void Opt_Adam::coeffsUpdate(const unsigned int c_id) {
     155          20 :   time_++;
     156             :   // AuxCoeffs is used for first moment (mean)
     157          20 :   AuxCoeffs(c_id) *= beta_1_;
     158          20 :   AuxCoeffs(c_id) += (1 - beta_1_ ) * Gradient(c_id) * CoeffsMask(c_id);
     159          20 :   VarCoeffs(c_id) *= beta_2_;
     160          20 :   VarCoeffs(c_id) += (1 - beta_2_ ) * Gradient(c_id) * Gradient(c_id) * CoeffsMask(c_id);
     161             : 
     162          20 :   if (amsgrad_) {
     163         120 :     for (size_t i = 0; i< VarCoeffs(c_id).getSize(); ++i) {
     164         110 :       if (VarCoeffs(c_id).getValue(i) > VarmaxCoeffs(c_id).getValue(i)) {
     165          95 :         VarmaxCoeffs(c_id)[i] = VarCoeffs(c_id).getValue(i);
     166             :       }
     167             :     }
     168             :   }
     169             : 
     170             :   // store sqrt of VarCoeffs in vector, easier than writing a CoeffsVector::sqrt() function
     171             :   // also directly add epsilon and invert to multiply with the Coeffs in last step
     172             :   std::vector<double> var_coeffs_sqrt;
     173          20 :   if (!amsgrad_) {
     174         120 :     for (size_t i = 0; i< VarCoeffs(c_id).getSize(); ++i) {
     175         110 :       var_coeffs_sqrt.push_back(1 / (sqrt(VarCoeffs(c_id).getValue(i)) + epsilon));
     176             :     }
     177             :   } else { // use VarmaxCoffs instead of VarCoeffs
     178         120 :     for (size_t i = 0; i< VarmaxCoeffs(c_id).getSize(); ++i) {
     179         110 :       var_coeffs_sqrt.push_back(1 / (sqrt(VarmaxCoeffs(c_id).getValue(i)) + epsilon));
     180             :     }
     181             :   }
     182             : 
     183             :   // bias correction
     184          20 :   double scalefactor = StepSize(c_id) * sqrt(1 - pow(beta_2_, time_)) / (1 - pow(beta_1_, time_));
     185             : 
     186          20 :   if (adamw_) { // check is not necessary but probably faster than always multiplying by 1
     187           0 :     Coeffs(c_id) *= one_minus_weight_decay_ * CoeffsMask(c_id);
     188             :   }
     189             : 
     190             :   // coeff update
     191          20 :   Coeffs(c_id) -= scalefactor * AuxCoeffs(c_id) * var_coeffs_sqrt * CoeffsMask(c_id);
     192          20 : }
     193             : 
     194             : 
     195             : }
     196             : }

Generated by: LCOV version 1.16