LCOV - code coverage report
Current view: top level - isdb - bAIes.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 9 122 7.4 %
Date: 2025-12-04 11:19:34 Functions: 1 11 9.1 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2024 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : 
      23             : #include "colvar/Colvar.h"
      24             : #include "core/ActionRegister.h"
      25             : #include "core/PlumedMain.h"
      26             : #include "tools/Communicator.h"
      27             : #include "tools/Matrix.h"
      28             : #include "core/GenericMolInfo.h"
      29             : #include "core/ActionSet.h"
      30             : #include "tools/File.h"
      31             : #include "tools/OpenMP.h"
      32             : #include <string>
      33             : #include <cmath>
      34             : #include <map>
      35             : #include <ctime>
      36             : #include "tools/Random.h"
      37             : 
      38             : #ifndef M_PI
      39             : //why do not use PLMD::pi?
      40             : #define M_PI           3.14159265358979323846
      41             : #endif
      42             : 
      43             : namespace PLMD {
      44             : namespace isdb {
      45             : 
      46             : //+PLUMEDOC ISDB_COLVAR BAIES
      47             : /*
      48             : Bayesian refinement of AF models.
      49             : 
      50             : This action implements the Bayesian approach to refine AF models introduced here and here.
      51             : It can be used to generate conformational ensembles of IDPs or refine AF models prior to small-molecule virtual screening.
      52             : 
      53             : ## Examples
      54             : 
      55             : Complete tutorials can be found <a href="https://github.com/COSBlab/bAIes-IDP">here</a> and here.
      56             : 
      57             : */
      58             : //+ENDPLUMEDOC
      59             : 
      60             : class BAIES : public Colvar {
      61             : 
      62             : private:
      63             : 
      64             : // bool pbc
      65             :   bool pbc_;
      66             : // temperature in kbt
      67             :   double kbt_;
      68             : // number of threads
      69             :   unsigned nt_;
      70             : // positions
      71             :   std::vector<Vector> pos_;
      72             : // derivatives
      73             :   std::vector<Vector> atom_der_;
      74             : // AF2 parameters variables
      75             :   std::string mtype_;
      76             :   std::vector < std::pair<unsigned, unsigned> > atom_pairs_;
      77             :   std::vector<double> mus_;
      78             :   std::vector<double> sigmas_;
      79             : // constants
      80             :   double inv_sqrt2_, sqrt2_pi_, inv_pi2_;
      81             : // prior type
      82             :   unsigned prior_;
      83             :   enum { NONE, JEFFREYS, CAUCHY };
      84             : // private functions
      85             :   void read_data_file(std::string datafile, std::vector<AtomNumber> atoms, double smin);
      86             : // energie
      87             :   double getGauss();
      88             :   double getGaussJeffreys();
      89             :   double getGaussCauchy();
      90             :   double getLognorm();
      91             :   double getLognormJeffreys();
      92             :   double getLognormCauchy();
      93             : 
      94             : public:
      95             :   static void registerKeywords( Keywords& keys );
      96             :   explicit BAIES(const ActionOptions&);
      97             : // active methods:
      98             :   void calculate() override;
      99             : };
     100             : 
     101             : PLUMED_REGISTER_ACTION(BAIES,"BAIES")
     102             : 
     103           2 : void BAIES::registerKeywords( Keywords& keys ) {
     104           2 :   Colvar::registerKeywords( keys );
     105           2 :   keys.add("atoms","ATOMS","atoms used in the calculation of bAIes energy");
     106           2 :   keys.add("compulsory","DATA_FILE","file with AF2 fit parameters");
     107           2 :   keys.add("compulsory","PRIOR", "type of prior to use (NONE, JEFFREYS, CAUCHY");
     108           2 :   keys.add("optional","TEMP", "temperature in kBt units");
     109           2 :   keys.add("optional","SIGMA_MIN", "minimum value of sigma");
     110           4 :   keys.addOutputComponent("ene","default","scalar","Bayesian bAIes energy");
     111           2 : }
     112             : 
     113           0 : BAIES::BAIES(const ActionOptions&ao):
     114             :   PLUMED_COLVAR_INIT(ao),
     115           0 :   pbc_(true) {
     116             :   // set constants
     117           0 :   inv_sqrt2_ = 1.0/sqrt(2.0);
     118           0 :   sqrt2_pi_  = sqrt(2.0 / M_PI);
     119           0 :   inv_pi2_   = 0.5 / M_PI / M_PI;
     120             : 
     121             :   // list of atoms
     122             :   std::vector<AtomNumber> atoms;
     123           0 :   parseAtomList("ATOMS", atoms);
     124             : 
     125             :   // file with AF2 fit parameters
     126             :   std::string datafile;
     127           0 :   parse("DATA_FILE", datafile);
     128             : 
     129             :   // prior type
     130             :   std::string prior;
     131           0 :   parse("PRIOR", prior);
     132             :   // check priors allowed
     133           0 :   if(prior=="NONE") {
     134           0 :     prior_ = NONE;
     135           0 :   } else if(prior=="JEFFREYS") {
     136           0 :     prior_ = JEFFREYS;
     137           0 :   } else if(prior=="CAUCHY") {
     138           0 :     prior_ = CAUCHY;
     139             :   } else {
     140           0 :     error("Unknown PRIOR type!");
     141             :   }
     142             : 
     143           0 :   bool nopbc=!pbc_;
     144           0 :   parseFlag("NOPBC",nopbc);
     145           0 :   pbc_=!nopbc;
     146             : 
     147             :   // set kbt
     148             :   //kbt_ = plumed.getAtoms().getKbT();
     149           0 :   kbt_ = getkBT();
     150           0 :   parse("TEMP", kbt_);
     151             : 
     152             :   // set sigma_min
     153           0 :   double sigma_min = -1.0;
     154           0 :   parse("SIGMA_MIN",sigma_min);
     155             : 
     156             :   // check read
     157           0 :   checkRead();
     158             : 
     159             :   // set parallel stuff
     160           0 :   unsigned mpisize=comm.Get_size();
     161           0 :   if(mpisize>1) {
     162           0 :     error("BAIES supports only OpenMP parallelization");
     163             :   }
     164             :   // set number of OpenMP threads
     165           0 :   nt_ = OpenMP::getNumThreads();
     166             : 
     167             :   // read the data file
     168           0 :   read_data_file(datafile, atoms, sigma_min);
     169             : 
     170             :   // print stuff to log
     171           0 :   log.printf("  number of atoms involved : %u\n", atoms.size());
     172           0 :   log.printf("  AF2 fit parameters file : %s\n", datafile.c_str());
     173           0 :   log.printf("  prior type : %s\n", prior.c_str());
     174             :   // print stuff from data file
     175           0 :   log.printf("  noise model type : %s\n", mtype_.c_str());
     176           0 :   log.printf("  number of pairs involved : %u\n", atom_pairs_.size());
     177             :   // other info
     178           0 :   log.printf("  temperature of the system in energy unit : %f\n", kbt_);
     179           0 :   log.printf("  minimum value of sigma : %f\n", sigma_min);
     180           0 :   if(pbc_) {
     181           0 :     log.printf("  using periodic boundary conditions\n");
     182             :   } else {
     183           0 :     log.printf("  without periodic boundary conditions\n");
     184             :   }
     185             : 
     186             :   // prepare other vectors: data and derivatives
     187           0 :   atom_der_.resize(atoms.size());
     188             : 
     189             :   // add components
     190           0 :   addComponentWithDerivatives("ene");
     191           0 :   componentIsNotPeriodic("ene");
     192             : 
     193             :   // request atoms
     194           0 :   requestAtoms(atoms);
     195             : 
     196             :   // print bibliography
     197           0 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     198           0 :   log<<plumed.cite("Schnapka, Morozova, Sen, Bonomi, bioRxiv (2025) doi: XXX");
     199           0 :   log<<"\n";
     200           0 : }
     201             : 
     202           0 : void BAIES::read_data_file(const std::string datafile, const std::vector<AtomNumber> atoms, double smin) {
     203             :   unsigned id, ai, aj;
     204             :   double mu, sigma;
     205             : // map serials to index in position array
     206             :   std::map< unsigned, unsigned > index;
     207           0 :   for(unsigned i=0; i<atoms.size(); ++i) {
     208           0 :     index[atoms[i].serial()]=i;
     209             :   }
     210             : // open file
     211           0 :   IFile *ifile = new IFile();
     212           0 :   if(ifile->FileExist(datafile)) {
     213           0 :     ifile->open(datafile);
     214             :     // read constant fields
     215           0 :     if( ifile->FieldExist("model") ) {
     216           0 :       ifile->scanField("model",mtype_);
     217           0 :       if( mtype_!="gaussian" && mtype_!="lognormal" ) {
     218           0 :         error("Unknown noise model type");
     219             :       }
     220             :     } else {
     221           0 :       error("Missing noise model type in DATA_FILE");
     222             :     }
     223             :     // read line-by-line
     224           0 :     while(ifile->scanField("Id",id)) {
     225           0 :       ifile->scanField("atom_i",ai);
     226           0 :       ifile->scanField("atom_j",aj);
     227           0 :       ifile->scanField("mu",mu);
     228           0 :       ifile->scanField("sigma",sigma);
     229             :       // list of pairs of atoms
     230           0 :       atom_pairs_.push_back(std::make_pair(index[ai],index[aj]));
     231             :       // list of mus
     232           0 :       mus_.push_back(mu);
     233             :       // list of sigmas
     234           0 :       sigmas_.push_back(std::max(smin,sigma));
     235             :       // new line
     236           0 :       ifile->scanField();
     237             :     }
     238           0 :     ifile->close();
     239             :   } else {
     240           0 :     error("Cannot find DATA_FILE "+datafile+"\n");
     241             :   }
     242           0 :   delete ifile;
     243           0 : }
     244             : 
     245             : // Energy models
     246           0 : double BAIES::getGauss() {
     247             :   double ene = 0.0;
     248             :   // loop over atoms pairs
     249           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     250             :   {
     251             :     std::vector<Vector> omp_der(atom_der_.size());
     252             :     #pragma omp for reduction( + : ene) nowait
     253             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     254             :       // get indexes
     255             :       unsigned ii = atom_pairs_[i].first;
     256             :       unsigned jj = atom_pairs_[i].second;
     257             :       // get distance
     258             :       Vector distance=delta(pos_[ii], pos_[jj]);
     259             :       const double value=distance.modulo();
     260             :       const double invvalue=1.0/value;
     261             :       // get energy
     262             :       double arg = ( value - mus_[i] ) / sigmas_[i];
     263             :       ene += arg * arg;
     264             :       // calculate derivatives
     265             :       double arg_der = kbt_ * arg / sigmas_[i];
     266             :       omp_der[ii] += - arg_der * invvalue * distance;
     267             :       omp_der[jj] +=   arg_der * invvalue * distance;
     268             :     }
     269             :     // gather from tasks
     270             :     #pragma omp critical
     271             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     272             :       atom_der_[i]+=omp_der[i];
     273             :     }
     274             :   }
     275             :   // missing Gaussian normalization constant
     276           0 :   return 0.5 * kbt_ * ene;
     277             : }
     278             : 
     279           0 : double BAIES::getGaussJeffreys() {
     280             :   double ene = 0.0;
     281             :   // loop over all atom pairs
     282           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     283             :   {
     284             :     std::vector<Vector> omp_der(atom_der_.size());
     285             :     #pragma omp for reduction( + : ene) nowait
     286             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     287             :       // get indexes
     288             :       unsigned ii = atom_pairs_[i].first;
     289             :       unsigned jj = atom_pairs_[i].second;
     290             :       // get distance
     291             :       Vector distance=delta(pos_[ii], pos_[jj]);
     292             :       const double value=distance.modulo();
     293             :       const double invvalue=1.0/value;
     294             :       // get energy
     295             :       double val_mu = value - mus_[i];
     296             :       double inv_val_mu = 1.0 / val_mu;
     297             :       double inv_sigma = 1.0 / sigmas_[i];
     298             :       double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
     299             :       double erfval = std::erf(erfcontent);
     300             :       // increase energy
     301             :       ene += -std::log( erfval * inv_val_mu );
     302             :       // calculate derivatives
     303             :       double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
     304             :       // increase derivatives
     305             :       omp_der[ii] += - arg_der * invvalue * distance;
     306             :       omp_der[jj] +=   arg_der * invvalue * distance;
     307             :     }
     308             :     // gather from tasks
     309             :     #pragma omp critical
     310             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     311             :       atom_der_[i]+=omp_der[i];
     312             :     }
     313             :   }
     314             :   // missing normalization constants
     315           0 :   return kbt_ * ene;
     316             : }
     317             : 
     318           0 : double BAIES::getGaussCauchy() {
     319             :   double ene = 0.0;
     320             :   // loop over all atom pairs
     321           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     322             :   {
     323             :     std::vector<Vector> omp_der(atom_der_.size());
     324             :     #pragma omp for reduction( + : ene) nowait
     325             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     326             :       // get indexes
     327             :       unsigned ii = atom_pairs_[i].first;
     328             :       unsigned jj = atom_pairs_[i].second;
     329             :       // get distance
     330             :       Vector distance=delta(pos_[ii], pos_[jj]);
     331             :       const double value=distance.modulo();
     332             :       const double invvalue=1.0/value;
     333             :       // get energy
     334             :       double val_mu = value - mus_[i];
     335             :       double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
     336             :       ene += std::log( lncontent );
     337             :       // calculate derivatives
     338             :       double arg_der =  kbt_ * val_mu / lncontent;
     339             :       omp_der[ii] += - arg_der * invvalue * distance;
     340             :       omp_der[jj] +=   arg_der * invvalue * distance;
     341             :     }
     342             :     // gather from tasks
     343             :     #pragma omp critical
     344             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     345             :       atom_der_[i]+=omp_der[i];
     346             :     }
     347             :   }
     348           0 :   return kbt_ * ene;
     349             : }
     350             : 
     351           0 : double BAIES::getLognorm() {
     352             :   double ene = 0.0;
     353             :   // loop over atoms pairs
     354           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     355             :   {
     356             :     std::vector<Vector> omp_der(atom_der_.size());
     357             :     #pragma omp for reduction( + : ene) nowait
     358             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     359             :       // get indexes
     360             :       unsigned ii = atom_pairs_[i].first;
     361             :       unsigned jj = atom_pairs_[i].second;
     362             :       // get distance
     363             :       Vector distance=delta(pos_[ii], pos_[jj]);
     364             :       const double value=distance.modulo();
     365             :       const double invvalue=1.0/value;
     366             :       // get energy
     367             :       double arg = ( std::log(value) - mus_[i] ) / sigmas_[i];
     368             :       ene += arg * arg;
     369             :       // calculate derivatives
     370             :       double arg_der = kbt_ * arg / (value * sigmas_[i]);
     371             :       omp_der[ii] += - arg_der * invvalue * distance;
     372             :       omp_der[jj] +=   arg_der * invvalue * distance;
     373             :     }
     374             :     // gather from tasks
     375             :     #pragma omp critical
     376             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     377             :       atom_der_[i]+=omp_der[i];
     378             :     }
     379             :   }
     380           0 :   return 0.5 * kbt_ * ene;
     381             : }
     382             : 
     383           0 : double BAIES::getLognormJeffreys() {
     384             :   double ene = 0.0;
     385             :   // loop over all atom pairs
     386           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     387             :   {
     388             :     std::vector<Vector> omp_der(atom_der_.size());
     389             :     #pragma omp for reduction( + : ene) nowait
     390             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     391             :       // get indexes
     392             :       unsigned ii = atom_pairs_[i].first;
     393             :       unsigned jj = atom_pairs_[i].second;
     394             :       // get distance
     395             :       Vector distance=delta(pos_[ii], pos_[jj]);
     396             :       const double value=distance.modulo();
     397             :       const double invvalue=1.0/value;
     398             :       // get energy
     399             :       double val_mu = std::log(value) - mus_[i];
     400             :       double inv_val_mu = 1 / val_mu;
     401             :       double inv_sigma = 1 / sigmas_[i];
     402             :       double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
     403             :       double erfval = std::erf(erfcontent);
     404             :       // increase energy
     405             :       ene += -std::log( erfval * inv_val_mu );
     406             :       // calculate derivatives
     407             :       double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
     408             :       // chain rule
     409             :       arg_der *= invvalue;
     410             :       // increase derivatives
     411             :       omp_der[ii] += - arg_der * invvalue * distance;
     412             :       omp_der[jj] +=   arg_der * invvalue * distance;
     413             :     }
     414             :     // gather from tasks
     415             :     #pragma omp critical
     416             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     417             :       atom_der_[i]+=omp_der[i];
     418             :     }
     419             :   }
     420             :   // missing constants
     421           0 :   return kbt_ * ene;
     422             : }
     423             : 
     424           0 : double BAIES::getLognormCauchy() {
     425             :   double ene = 0.0;
     426             :   // loop over all atom pairs
     427           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     428             :   {
     429             :     std::vector<Vector> omp_der(atom_der_.size());
     430             :     #pragma omp for reduction( + : ene) nowait
     431             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     432             :       // get indexes
     433             :       unsigned ii = atom_pairs_[i].first;
     434             :       unsigned jj = atom_pairs_[i].second;
     435             :       // get distance
     436             :       Vector distance=delta(pos_[ii], pos_[jj]);
     437             :       const double value=distance.modulo();
     438             :       const double invvalue=1.0/value;
     439             :       // get energy
     440             :       double val_mu = std::log(value) - mus_[i];
     441             :       double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
     442             :       ene += std::log( lncontent );
     443             :       // calculate derivatives
     444             :       double arg_der =  kbt_ * val_mu * invvalue / lncontent;
     445             :       omp_der[ii] += - arg_der * invvalue * distance;
     446             :       omp_der[jj] +=   arg_der * invvalue * distance;
     447             :     }
     448             :     // gather from tasks
     449             :     #pragma omp critical
     450             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     451             :       atom_der_[i]+=omp_der[i];
     452             :     }
     453             :   }
     454             :   // missing constants
     455           0 :   return kbt_ * ene;
     456             : }
     457             : 
     458           0 : void BAIES::calculate() {
     459             :   // fix PBCs
     460           0 :   if(pbc_) {
     461           0 :     makeWhole();
     462             :   }
     463             : 
     464             :   // get the positions at this step
     465           0 :   pos_ = getPositions();
     466             : 
     467             :   // reset derivatives
     468           0 :   #pragma omp parallel for num_threads(nt_)
     469             :   for(unsigned i=0; i<atom_der_.size(); ++i) {
     470             :     atom_der_[i] = Vector(0,0,0);
     471             :   }
     472             : 
     473             :   // calculate bAIes energy and derivatives
     474             :   double ene = 0.0;
     475             :   // Gaussian noise model
     476           0 :   if(mtype_=="gaussian") {
     477           0 :     switch(prior_) {
     478           0 :     case NONE:
     479           0 :       ene = getGauss();
     480             :       break;
     481           0 :     case JEFFREYS:
     482           0 :       ene = getGaussJeffreys();
     483             :       break;
     484           0 :     case CAUCHY:
     485           0 :       ene = getGaussCauchy();
     486             :       break;
     487             :     }
     488             :   } else {
     489           0 :     switch(prior_) {
     490           0 :     case NONE:
     491           0 :       ene = getLognorm();
     492             :       break;
     493           0 :     case JEFFREYS:
     494           0 :       ene = getLognormJeffreys();
     495             :       break;
     496           0 :     case CAUCHY:
     497           0 :       ene = getLognormCauchy();
     498             :       break;
     499             :     }
     500             :   }
     501             :   // set score
     502           0 :   Value* score = getPntrToComponent("ene");
     503             :   score->set(ene);
     504             :   // calculate virial
     505             :   Tensor virial;
     506             :   // declare omp reduction for Tensors
     507             :   #pragma omp declare reduction( sumTensor : Tensor : omp_out += omp_in )
     508             : 
     509           0 :   #pragma omp parallel for num_threads(nt_) reduction (sumTensor : virial)
     510             :   for(unsigned i=0; i<pos_.size(); ++i) {
     511             :     virial += Tensor(pos_[i], -atom_der_[i]);
     512             :   }
     513             :   // set virial
     514           0 :   setBoxDerivatives(score, virial);
     515             :   // set derivatives
     516           0 :   #pragma omp parallel for num_threads(nt_)
     517             :   for(unsigned i=0; i<atom_der_.size(); ++i) {
     518             :     setAtomsDerivatives(score, i, atom_der_[i]);
     519             :   }
     520           0 : }
     521             : 
     522             : }
     523             : }

Generated by: LCOV version 1.16