LCOV - code coverage report
Current view: top level - isdb - bAIes.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 9 123 7.3 %
Date: 2025-11-25 13:55:50 Functions: 1 11 9.1 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2024 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : 
      23             : #include "colvar/Colvar.h"
      24             : #include "core/ActionRegister.h"
      25             : #include "core/PlumedMain.h"
      26             : #include "tools/Communicator.h"
      27             : #include "tools/Matrix.h"
      28             : #include "core/GenericMolInfo.h"
      29             : #include "core/ActionSet.h"
      30             : #include "tools/File.h"
      31             : #include "tools/OpenMP.h"
      32             : #include <string>
      33             : #include <iostream>
      34             : #include <cmath>
      35             : #include <map>
      36             : #include <numeric>
      37             : #include <ctime>
      38             : #include "tools/Random.h"
      39             : 
      40             : #ifndef M_PI
      41             : #define M_PI           3.14159265358979323846
      42             : #endif
      43             : 
      44             : namespace PLMD {
      45             : namespace isdb {
      46             : 
      47             : //+PLUMEDOC ISDB_COLVAR BAIES
      48             : /*
      49             : Bayesian refinement of AF models.
      50             : 
      51             : This action implements the Bayesian approach to refine AF models introduced here and here.
      52             : It can be used to generate conformational ensembles of IDPs or refine AF models prior to small-molecule virtual screening.
      53             : 
      54             : \par Examples
      55             : ## Examples
      56             : 
      57             : Complete tutorials can be found <a href="https://github.com/COSBlab/bAIes-IDP">here</a> and here.
      58             : 
      59             : */
      60             : //+ENDPLUMEDOC
      61             : 
      62             : class BAIES : public Colvar {
      63             : 
      64             : private:
      65             : 
      66             : // bool pbc
      67             :   bool pbc_;
      68             : // temperature in kbt
      69             :   double kbt_;
      70             : // number of threads
      71             :   unsigned nt_;
      72             : // positions
      73             :   std::vector<Vector> pos_;
      74             : // derivatives
      75             :   std::vector<Vector> atom_der_;
      76             : // AF2 parameters variables
      77             :   std::string mtype_;
      78             :   std::vector < std::pair<unsigned, unsigned> > atom_pairs_;
      79             :   std::vector<double> mus_;
      80             :   std::vector<double> sigmas_;
      81             : // constants
      82             :   double inv_sqrt2_, sqrt2_pi_, inv_pi2_;
      83             : // prior type
      84             :   unsigned prior_;
      85             :   enum { NONE, JEFFREYS, CAUCHY };
      86             : // private functions
      87             :   void read_data_file(std::string datafile, std::vector<AtomNumber> atoms, double smin);
      88             : // energie
      89             :   double getGauss();
      90             :   double getGaussJeffreys();
      91             :   double getGaussCauchy();
      92             :   double getLognorm();
      93             :   double getLognormJeffreys();
      94             :   double getLognormCauchy();
      95             : 
      96             : public:
      97             :   static void registerKeywords( Keywords& keys );
      98             :   explicit BAIES(const ActionOptions&);
      99             : // active methods:
     100             :   void calculate() override;
     101             : };
     102             : 
     103             : PLUMED_REGISTER_ACTION(BAIES,"BAIES")
     104             : 
     105           2 : void BAIES::registerKeywords( Keywords& keys ) {
     106           2 :   Colvar::registerKeywords( keys );
     107           4 :   keys.add("atoms","ATOMS","atoms used in the calculation of bAIes energy");
     108           4 :   keys.add("compulsory","DATA_FILE","file with AF2 fit parameters");
     109           4 :   keys.add("compulsory","PRIOR", "type of prior to use (NONE, JEFFREYS, CAUCHY");
     110           4 :   keys.add("optional","TEMP", "temperature in kBt units");
     111           4 :   keys.add("optional","SIGMA_MIN", "minimum value of sigma");
     112           4 :   keys.addOutputComponent("ene","default","Bayesian bAIes energy");
     113           2 : }
     114             : 
     115           0 : BAIES::BAIES(const ActionOptions&ao):
     116             :   PLUMED_COLVAR_INIT(ao),
     117           0 :   pbc_(true) {
     118             :   // set constants
     119           0 :   inv_sqrt2_ = 1.0/sqrt(2.0);
     120           0 :   sqrt2_pi_  = sqrt(2.0 / M_PI);
     121           0 :   inv_pi2_   = 0.5 / M_PI / M_PI;
     122             : 
     123             :   // list of atoms
     124             :   std::vector<AtomNumber> atoms;
     125           0 :   parseAtomList("ATOMS", atoms);
     126             : 
     127             :   // file with AF2 fit parameters
     128             :   std::string datafile;
     129           0 :   parse("DATA_FILE", datafile);
     130             : 
     131             :   // prior type
     132             :   std::string prior;
     133           0 :   parse("PRIOR", prior);
     134             :   // check priors allowed
     135           0 :   if(prior=="NONE") {
     136           0 :     prior_ = NONE;
     137           0 :   } else if(prior=="JEFFREYS") {
     138           0 :     prior_ = JEFFREYS;
     139           0 :   } else if(prior=="CAUCHY") {
     140           0 :     prior_ = CAUCHY;
     141             :   } else {
     142           0 :     error("Unknown PRIOR type!");
     143             :   }
     144             : 
     145           0 :   bool nopbc=!pbc_;
     146           0 :   parseFlag("NOPBC",nopbc);
     147           0 :   pbc_=!nopbc;
     148             : 
     149             :   // set kbt
     150           0 :   kbt_ = plumed.getAtoms().getKbT();
     151           0 :   parse("TEMP", kbt_);
     152             : 
     153             :   // set sigma_min
     154           0 :   double sigma_min = -1.0;
     155           0 :   parse("SIGMA_MIN",sigma_min);
     156             : 
     157             :   // check read
     158           0 :   checkRead();
     159             : 
     160             :   // set parallel stuff
     161           0 :   unsigned mpisize=comm.Get_size();
     162           0 :   if(mpisize>1) {
     163           0 :     error("BAIES supports only OpenMP parallelization");
     164             :   }
     165             :   // set number of OpenMP threads
     166           0 :   nt_ = OpenMP::getNumThreads();
     167             : 
     168             :   // read the data file
     169           0 :   read_data_file(datafile, atoms, sigma_min);
     170             : 
     171             :   // print stuff to log
     172           0 :   log.printf("  number of atoms involved : %u\n", atoms.size());
     173           0 :   log.printf("  AF2 fit parameters file : %s\n", datafile.c_str());
     174           0 :   log.printf("  prior type : %s\n", prior.c_str());
     175             :   // print stuff from data file
     176           0 :   log.printf("  noise model type : %s\n", mtype_.c_str());
     177           0 :   log.printf("  number of pairs involved : %u\n", atom_pairs_.size());
     178             :   // other info
     179           0 :   log.printf("  temperature of the system in energy unit : %f\n", kbt_);
     180           0 :   log.printf("  minimum value of sigma : %f\n", sigma_min);
     181           0 :   if(pbc_) {
     182           0 :     log.printf("  using periodic boundary conditions\n");
     183             :   } else {
     184           0 :     log.printf("  without periodic boundary conditions\n");
     185             :   }
     186             : 
     187             :   // prepare other vectors: data and derivatives
     188           0 :   atom_der_.resize(atoms.size());
     189             : 
     190             :   // add components
     191           0 :   addComponentWithDerivatives("ene");
     192           0 :   componentIsNotPeriodic("ene");
     193             : 
     194             :   // request atoms
     195           0 :   requestAtoms(atoms);
     196             : 
     197             :   // print bibliography
     198           0 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     199           0 :   log<<plumed.cite("Schnapka, Morozova, Sen, Bonomi, bioRxiv (2025) doi: XXX");
     200           0 :   log<<"\n";
     201           0 : }
     202             : 
     203           0 : void BAIES::read_data_file(const std::string datafile, const std::vector<AtomNumber> atoms, double smin) {
     204             :   unsigned id, ai, aj;
     205             :   double mu, sigma;
     206             : // map serials to index in position array
     207             :   std::map< unsigned, unsigned > index;
     208           0 :   for(unsigned i=0; i<atoms.size(); ++i) {
     209           0 :     index[atoms[i].serial()]=i;
     210             :   }
     211             : // open file
     212           0 :   IFile *ifile = new IFile();
     213           0 :   if(ifile->FileExist(datafile)) {
     214           0 :     ifile->open(datafile);
     215             :     // read constant fields
     216           0 :     if( ifile->FieldExist("model") ) {
     217           0 :       ifile->scanField("model",mtype_);
     218           0 :       if( mtype_!="gaussian" && mtype_!="lognormal" ) {
     219           0 :         error("Unknown noise model type");
     220             :       }
     221             :     } else {
     222           0 :       error("Missing noise model type in DATA_FILE");
     223             :     }
     224             :     // read line-by-line
     225           0 :     while(ifile->scanField("Id",id)) {
     226           0 :       ifile->scanField("atom_i",ai);
     227           0 :       ifile->scanField("atom_j",aj);
     228           0 :       ifile->scanField("mu",mu);
     229           0 :       ifile->scanField("sigma",sigma);
     230             :       // list of pairs of atoms
     231           0 :       atom_pairs_.push_back(std::make_pair(index[ai],index[aj]));
     232             :       // list of mus
     233           0 :       mus_.push_back(mu);
     234             :       // list of sigmas
     235           0 :       sigmas_.push_back(std::max(smin,sigma));
     236             :       // new line
     237           0 :       ifile->scanField();
     238             :     }
     239           0 :     ifile->close();
     240             :   } else {
     241           0 :     error("Cannot find DATA_FILE "+datafile+"\n");
     242             :   }
     243           0 :   delete ifile;
     244           0 : }
     245             : 
     246             : // Energy models
     247           0 : double BAIES::getGauss() {
     248             :   double ene = 0.0;
     249             :   // loop over atoms pairs
     250           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     251             :   {
     252             :     std::vector<Vector> omp_der(atom_der_.size());
     253             :     #pragma omp for reduction( + : ene) nowait
     254             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     255             :       // get indexes
     256             :       unsigned ii = atom_pairs_[i].first;
     257             :       unsigned jj = atom_pairs_[i].second;
     258             :       // get distance
     259             :       Vector distance=delta(pos_[ii], pos_[jj]);
     260             :       const double value=distance.modulo();
     261             :       const double invvalue=1.0/value;
     262             :       // get energy
     263             :       double arg = ( value - mus_[i] ) / sigmas_[i];
     264             :       ene += arg * arg;
     265             :       // calculate derivatives
     266             :       double arg_der = kbt_ * arg / sigmas_[i];
     267             :       omp_der[ii] += - arg_der * invvalue * distance;
     268             :       omp_der[jj] +=   arg_der * invvalue * distance;
     269             :     }
     270             :     // gather from tasks
     271             :     #pragma omp critical
     272             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     273             :       atom_der_[i]+=omp_der[i];
     274             :     }
     275             :   }
     276             :   // missing Gaussian normalization constant
     277           0 :   return 0.5 * kbt_ * ene;
     278             : }
     279             : 
     280           0 : double BAIES::getGaussJeffreys() {
     281             :   double ene = 0.0;
     282             :   // loop over all atom pairs
     283           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     284             :   {
     285             :     std::vector<Vector> omp_der(atom_der_.size());
     286             :     #pragma omp for reduction( + : ene) nowait
     287             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     288             :       // get indexes
     289             :       unsigned ii = atom_pairs_[i].first;
     290             :       unsigned jj = atom_pairs_[i].second;
     291             :       // get distance
     292             :       Vector distance=delta(pos_[ii], pos_[jj]);
     293             :       const double value=distance.modulo();
     294             :       const double invvalue=1.0/value;
     295             :       // get energy
     296             :       double val_mu = value - mus_[i];
     297             :       double inv_val_mu = 1.0 / val_mu;
     298             :       double inv_sigma = 1.0 / sigmas_[i];
     299             :       double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
     300             :       double erfval = std::erf(erfcontent);
     301             :       // increase energy
     302             :       ene += -std::log( erfval * inv_val_mu );
     303             :       // calculate derivatives
     304             :       double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
     305             :       // increase derivatives
     306             :       omp_der[ii] += - arg_der * invvalue * distance;
     307             :       omp_der[jj] +=   arg_der * invvalue * distance;
     308             :     }
     309             :     // gather from tasks
     310             :     #pragma omp critical
     311             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     312             :       atom_der_[i]+=omp_der[i];
     313             :     }
     314             :   }
     315             :   // missing normalization constants
     316           0 :   return kbt_ * ene;
     317             : }
     318             : 
     319           0 : double BAIES::getGaussCauchy() {
     320             :   double ene = 0.0;
     321             :   // loop over all atom pairs
     322           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     323             :   {
     324             :     std::vector<Vector> omp_der(atom_der_.size());
     325             :     #pragma omp for reduction( + : ene) nowait
     326             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     327             :       // get indexes
     328             :       unsigned ii = atom_pairs_[i].first;
     329             :       unsigned jj = atom_pairs_[i].second;
     330             :       // get distance
     331             :       Vector distance=delta(pos_[ii], pos_[jj]);
     332             :       const double value=distance.modulo();
     333             :       const double invvalue=1.0/value;
     334             :       // get energy
     335             :       double val_mu = value - mus_[i];
     336             :       double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
     337             :       ene += std::log( lncontent );
     338             :       // calculate derivatives
     339             :       double arg_der =  kbt_ * val_mu / lncontent;
     340             :       omp_der[ii] += - arg_der * invvalue * distance;
     341             :       omp_der[jj] +=   arg_der * invvalue * distance;
     342             :     }
     343             :     // gather from tasks
     344             :     #pragma omp critical
     345             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     346             :       atom_der_[i]+=omp_der[i];
     347             :     }
     348             :   }
     349           0 :   return kbt_ * ene;
     350             : }
     351             : 
     352           0 : double BAIES::getLognorm() {
     353             :   double ene = 0.0;
     354             :   // loop over atoms pairs
     355           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     356             :   {
     357             :     std::vector<Vector> omp_der(atom_der_.size());
     358             :     #pragma omp for reduction( + : ene) nowait
     359             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     360             :       // get indexes
     361             :       unsigned ii = atom_pairs_[i].first;
     362             :       unsigned jj = atom_pairs_[i].second;
     363             :       // get distance
     364             :       Vector distance=delta(pos_[ii], pos_[jj]);
     365             :       const double value=distance.modulo();
     366             :       const double invvalue=1.0/value;
     367             :       // get energy
     368             :       double arg = ( std::log(value) - mus_[i] ) / sigmas_[i];
     369             :       ene += arg * arg;
     370             :       // calculate derivatives
     371             :       double arg_der = kbt_ * arg / (value * sigmas_[i]);
     372             :       omp_der[ii] += - arg_der * invvalue * distance;
     373             :       omp_der[jj] +=   arg_der * invvalue * distance;
     374             :     }
     375             :     // gather from tasks
     376             :     #pragma omp critical
     377             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     378             :       atom_der_[i]+=omp_der[i];
     379             :     }
     380             :   }
     381           0 :   return 0.5 * kbt_ * ene;
     382             : }
     383             : 
     384           0 : double BAIES::getLognormJeffreys() {
     385             :   double ene = 0.0;
     386             :   // loop over all atom pairs
     387           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     388             :   {
     389             :     std::vector<Vector> omp_der(atom_der_.size());
     390             :     #pragma omp for reduction( + : ene) nowait
     391             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     392             :       // get indexes
     393             :       unsigned ii = atom_pairs_[i].first;
     394             :       unsigned jj = atom_pairs_[i].second;
     395             :       // get distance
     396             :       Vector distance=delta(pos_[ii], pos_[jj]);
     397             :       const double value=distance.modulo();
     398             :       const double invvalue=1.0/value;
     399             :       // get energy
     400             :       double val_mu = std::log(value) - mus_[i];
     401             :       double inv_val_mu = 1 / val_mu;
     402             :       double inv_sigma = 1 / sigmas_[i];
     403             :       double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
     404             :       double erfval = std::erf(erfcontent);
     405             :       // increase energy
     406             :       ene += -std::log( erfval * inv_val_mu );
     407             :       // calculate derivatives
     408             :       double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
     409             :       // chain rule
     410             :       arg_der *= invvalue;
     411             :       // increase derivatives
     412             :       omp_der[ii] += - arg_der * invvalue * distance;
     413             :       omp_der[jj] +=   arg_der * invvalue * distance;
     414             :     }
     415             :     // gather from tasks
     416             :     #pragma omp critical
     417             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     418             :       atom_der_[i]+=omp_der[i];
     419             :     }
     420             :   }
     421             :   // missing constants
     422           0 :   return kbt_ * ene;
     423             : }
     424             : 
     425           0 : double BAIES::getLognormCauchy() {
     426             :   double ene = 0.0;
     427             :   // loop over all atom pairs
     428           0 :   #pragma omp parallel num_threads(nt_) shared(ene)
     429             :   {
     430             :     std::vector<Vector> omp_der(atom_der_.size());
     431             :     #pragma omp for reduction( + : ene) nowait
     432             :     for(unsigned i=0; i<atom_pairs_.size(); ++i) {
     433             :       // get indexes
     434             :       unsigned ii = atom_pairs_[i].first;
     435             :       unsigned jj = atom_pairs_[i].second;
     436             :       // get distance
     437             :       Vector distance=delta(pos_[ii], pos_[jj]);
     438             :       const double value=distance.modulo();
     439             :       const double invvalue=1.0/value;
     440             :       // get energy
     441             :       double val_mu = std::log(value) - mus_[i];
     442             :       double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
     443             :       ene += std::log( lncontent );
     444             :       // calculate derivatives
     445             :       double arg_der =  kbt_ * val_mu * invvalue / lncontent;
     446             :       omp_der[ii] += - arg_der * invvalue * distance;
     447             :       omp_der[jj] +=   arg_der * invvalue * distance;
     448             :     }
     449             :     // gather from tasks
     450             :     #pragma omp critical
     451             :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     452             :       atom_der_[i]+=omp_der[i];
     453             :     }
     454             :   }
     455             :   // missing constants
     456           0 :   return kbt_ * ene;
     457             : }
     458             : 
     459           0 : void BAIES::calculate() {
     460             :   // fix PBCs
     461           0 :   if(pbc_) {
     462           0 :     makeWhole();
     463             :   }
     464             : 
     465             :   // get the positions at this step
     466           0 :   pos_ = getPositions();
     467             : 
     468             :   // reset derivatives
     469           0 :   #pragma omp parallel for num_threads(nt_)
     470             :   for(unsigned i=0; i<atom_der_.size(); ++i) {
     471             :     atom_der_[i] = Vector(0,0,0);
     472             :   }
     473             : 
     474             :   // calculate bAIes energy and derivatives
     475             :   double ene = 0.0;
     476             :   // Gaussian noise model
     477           0 :   if(mtype_=="gaussian") {
     478           0 :     switch(prior_) {
     479           0 :     case NONE:
     480           0 :       ene = getGauss();
     481             :       break;
     482           0 :     case JEFFREYS:
     483           0 :       ene = getGaussJeffreys();
     484             :       break;
     485           0 :     case CAUCHY:
     486           0 :       ene = getGaussCauchy();
     487             :       break;
     488             :     }
     489             :   } else {
     490           0 :     switch(prior_) {
     491           0 :     case NONE:
     492           0 :       ene = getLognorm();
     493             :       break;
     494           0 :     case JEFFREYS:
     495           0 :       ene = getLognormJeffreys();
     496             :       break;
     497           0 :     case CAUCHY:
     498           0 :       ene = getLognormCauchy();
     499             :       break;
     500             :     }
     501             :   }
     502             :   // set score
     503           0 :   Value* score = getPntrToComponent("ene");
     504             :   score->set(ene);
     505             :   // calculate virial
     506           0 :   Tensor virial;
     507             :   // declare omp reduction for Tensors
     508             :   #pragma omp declare reduction( sumTensor : Tensor : omp_out += omp_in )
     509             : 
     510           0 :   #pragma omp parallel for num_threads(nt_) reduction (sumTensor : virial)
     511             :   for(unsigned i=0; i<pos_.size(); ++i) {
     512             :     virial += Tensor(pos_[i], -atom_der_[i]);
     513             :   }
     514             :   // set virial
     515           0 :   setBoxDerivatives(score, virial);
     516             :   // set derivatives
     517           0 :   #pragma omp parallel for num_threads(nt_)
     518             :   for(unsigned i=0; i<atom_der_.size(); ++i) {
     519             :     setAtomsDerivatives(score, i, atom_der_[i]);
     520             :   }
     521           0 : }
     522             : 
     523             : }
     524             : }

Generated by: LCOV version 1.16