LCOV - code coverage report
Current view: top level - isdb - EMMI.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 668 878 76.1 %
Date: 2026-03-30 13:16:06 Functions: 34 42 81.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2017-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "core/ActionWithValue.h"
      23             : #include "core/ActionAtomistic.h"
      24             : #include "core/ActionWithArguments.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "core/PlumedMain.h"
      27             : #include "tools/Matrix.h"
      28             : #include "core/GenericMolInfo.h"
      29             : #include "core/ActionSet.h"
      30             : #include "tools/File.h"
      31             : #include "tools/Random.h"
      32             : 
      33             : #include <string>
      34             : #include <map>
      35             : #include <numeric>
      36             : #include <ctime>
      37             : 
      38             : namespace PLMD {
      39             : namespace isdb {
      40             : 
      41             : //+PLUMEDOC ISDB_COLVAR EMMI
      42             : /*
      43             : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
      44             : 
      45             : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in  Ref. \cite Hanot113951 .
      46             : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
      47             : 
      48             : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
      49             : GMM_FILE. We are currently working on a web server to perform
      50             : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
      51             : 
      52             : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
      53             : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
      54             : the Metainference approach \cite Bonomi:2016ip .
      55             : 
      56             : \warning
      57             : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
      58             : 
      59             : \note
      60             : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
      61             : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
      62             : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
      63             : 
      64             : \note
      65             : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
      66             : add the NOPBC flag to the input line
      67             : 
      68             : \par Examples
      69             : 
      70             : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
      71             : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
      72             : 
      73             : \plumedfile
      74             : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
      75             :      0  2.9993805e+01   6.54628 10.37820 -0.92988  2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02  1
      76             :      1  2.3468312e+01   6.56095 10.34790 -0.87808  1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02  1
      77             :      ...
      78             : \endplumedfile
      79             : 
      80             : To accelerate the computation of the Bayesian score, one can:
      81             : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
      82             : - calculate the restraint every other step (or more).
      83             : 
      84             : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
      85             : using a GROMACS index file.
      86             : 
      87             : The input file looks as follows:
      88             : 
      89             : \plumedfile
      90             : # include pdb info
      91             : MOLINFO STRUCTURE=prot.pdb
      92             : 
      93             : #  all heavy atoms
      94             : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
      95             : 
      96             : # create EMMI score
      97             : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
      98             : 
      99             : # translate into bias - apply every 2 steps
     100             : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
     101             : 
     102             : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
     103             : \endplumedfile
     104             : 
     105             : 
     106             : */
     107             : //+ENDPLUMEDOC
     108             : 
     109             : class EMMI :
     110             :   public ActionAtomistic,
     111             :   public ActionWithArguments,
     112             :   public ActionWithValue {
     113             : private:
     114             : 
     115             : // temperature in kbt
     116             :   double kbt_;
     117             : // model GMM - atom types
     118             :   std::vector<unsigned> GMM_m_type_;
     119             : // model GMM - list of atom sigmas - one per atom type
     120             :   std::vector<double> GMM_m_s_;
     121             : // model GMM - list of atom weights - one per atom type
     122             :   std::vector<double> GMM_m_w_;
     123             : // data GMM - means, weights, and covariances + beta option
     124             :   std::vector<Vector>             GMM_d_m_;
     125             :   std::vector<double>             GMM_d_w_;
     126             :   std::vector< VectorGeneric<6> > GMM_d_cov_;
     127             :   std::vector<int>                GMM_d_beta_;
     128             :   std::vector < std::vector<int> >     GMM_d_grps_;
     129             : // overlaps
     130             :   std::vector<double> ovmd_;
     131             :   std::vector<double> ovdd_;
     132             :   std::vector<double> ovmd_ave_;
     133             : // and derivatives
     134             :   std::vector<Vector> ovmd_der_;
     135             :   std::vector<Vector> atom_der_;
     136             :   std::vector<double> GMMid_der_;
     137             : // constants
     138             :   double cfact_;
     139             :   double inv_sqrt2_, sqrt2_pi_;
     140             : // metainference
     141             :   unsigned nrep_;
     142             :   unsigned replica_;
     143             :   std::vector<double> sigma_;
     144             :   std::vector<double> sigma_min_;
     145             :   std::vector<double> sigma_max_;
     146             :   std::vector<double> dsigma_;
     147             : // list of prefactors for overlap between two Gaussians
     148             : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
     149             :   std::vector<double> pre_fact_;
     150             : // inverse of the sum of model and data covariances matrices
     151             :   std::vector< VectorGeneric<6> > inv_cov_md_;
     152             : // neighbor list
     153             :   double   nl_cutoff_;
     154             :   unsigned nl_stride_;
     155             :   bool first_time_;
     156             :   bool no_aver_;
     157             :   std::vector<unsigned> nl_;
     158             : // parallel stuff
     159             :   unsigned size_;
     160             :   unsigned rank_;
     161             : // pbc
     162             :   bool pbc_;
     163             : // Monte Carlo stuff
     164             :   int      MCstride_;
     165             :   double   MCaccept_;
     166             :   double   MCtrials_;
     167             :   Random   random_;
     168             :   // status stuff
     169             :   unsigned int statusstride_;
     170             :   std::string       statusfilename_;
     171             :   OFile        statusfile_;
     172             :   bool         first_status_;
     173             :   // regression
     174             :   unsigned nregres_;
     175             :   double scale_;
     176             :   double scale_min_;
     177             :   double scale_max_;
     178             :   double dscale_;
     179             :   // tabulated exponential
     180             :   double dpcutoff_;
     181             :   double dexp_;
     182             :   unsigned nexp_;
     183             :   std::vector<double> tab_exp_;
     184             :   // simulated annealing
     185             :   unsigned nanneal_;
     186             :   double   kanneal_;
     187             :   double   anneal_;
     188             :   // prior exponent
     189             :   double prior_;
     190             :   // noise type
     191             :   unsigned noise_;
     192             :   // total score and virial;
     193             :   double ene_;
     194             :   Tensor virial_;
     195             :   // model overlap file
     196             :   unsigned int ovstride_;
     197             :   std::string       ovfilename_;
     198             : 
     199             :   // Reweighting additions
     200             :   bool do_reweight_;
     201             :   bool first_time_w_;
     202             :   std::vector<double> forces;
     203             :   std::vector<double> forcesToApply;
     204             : 
     205             :   // average weights
     206             :   double decay_w_;
     207             :   std::vector<double> average_weights_;
     208             : 
     209             : // write file with model overlap
     210             :   void write_model_overlap(long long int step);
     211             : // get median of std::vector
     212             :   double get_median(std::vector<double> &v);
     213             : // annealing
     214             :   double get_annealing(long long int step);
     215             : // do regression
     216             :   double scaleEnergy(double s);
     217             :   double doRegression();
     218             : // read and write status
     219             :   void read_status();
     220             :   void print_status(long long int step);
     221             : // accept or reject
     222             :   bool doAccept(double oldE, double newE, double kbt);
     223             : // do MonteCarlo
     224             :   void doMonteCarlo();
     225             : // read error file
     226             :   std::vector<double> read_exp_errors(const std::string & errfile);
     227             : // read experimental overlaps
     228             :   std::vector<double> read_exp_overlaps(const std::string & ovfile);
     229             : // calculate model GMM weights and covariances
     230             :   std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
     231             : // read data GMM file
     232             :   void get_GMM_d(std::string gmm_file);
     233             : // check GMM data
     234             :   void check_GMM_d(const VectorGeneric<6> &cov, double w);
     235             : // auxiliary method
     236             :   void calculate_useful_stuff(double reso);
     237             : // get pref_fact and inv_cov_md
     238             :   double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
     239             :                                 double GMM_w_0, double GMM_w_1,
     240             :                                 VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
     241             : // calculate self overlaps between data GMM components - ovdd_
     242             :   double get_self_overlap(unsigned id);
     243             : // calculate overlap between two Gaussians
     244             :   double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
     245             :                      const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
     246             : // calculate exponent of overlap for neighbor list update
     247             :   double get_exp_overlap(const Vector &m_m, const Vector &d_m,
     248             :                          const VectorGeneric<6> &inv_cov_md);
     249             : // update the neighbor list
     250             :   void update_neighbor_list();
     251             : // calculate overlap
     252             :   void calculate_overlap();
     253             : // Gaussian noise
     254             :   void calculate_Gauss();
     255             : // Outliers noise
     256             :   void calculate_Outliers();
     257             : // Marginal noise
     258             :   void calculate_Marginal();
     259             : 
     260             :   // See MetainferenceBase
     261             :   void get_weights(double &weight, double &norm, double &neff);
     262             : 
     263             : public:
     264             :   static void registerKeywords( Keywords& keys );
     265             :   explicit EMMI(const ActionOptions&);
     266             :   // needed for reweighting
     267             :   void setDerivatives();
     268             :   void turnOnDerivatives() override;
     269             :   unsigned getNumberOfDerivatives() override;
     270             :   void lockRequests() override;
     271             :   void unlockRequests() override;
     272             :   void calculateNumericalDerivatives( ActionWithValue* a ) override;
     273             :   void apply() override;
     274             :   void setArgDerivatives(Value *v, const double &d);
     275             :   void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
     276             :   void setBoxDerivatives(Value*v, const Tensor&d);
     277             : // active methods:
     278             :   void prepare() override;
     279             :   void calculate() override;
     280             : };
     281             : 
     282             : inline
     283          30 : void EMMI::setDerivatives() {
     284             :   // Get appropriate number of derivatives
     285             :   // Derivatives are first for arguments and then for atoms
     286             :   unsigned nder;
     287          30 :   if( getNumberOfAtoms()>0 ) {
     288          30 :     nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     289             :   } else {
     290           0 :     nder = getNumberOfArguments();
     291             :   }
     292             : 
     293             :   // Resize all derivative arrays
     294          30 :   forces.resize( nder );
     295          30 :   forcesToApply.resize( nder );
     296         146 :   for(int i=0; i<getNumberOfComponents(); ++i) {
     297         116 :     getPntrToComponent(i)->resizeDerivatives(nder);
     298             :   }
     299          30 : }
     300             : 
     301             : inline
     302          22 : void EMMI::turnOnDerivatives() {
     303          22 :   ActionWithValue::turnOnDerivatives();
     304          22 : }
     305             : 
     306             : inline
     307          80 : unsigned EMMI::getNumberOfDerivatives() {
     308          80 :   if( getNumberOfAtoms()>0 ) {
     309          80 :     return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     310             :   }
     311           0 :   return getNumberOfArguments();
     312             : }
     313             : 
     314             : inline
     315          70 : void EMMI::lockRequests() {
     316             :   ActionAtomistic::lockRequests();
     317             :   ActionWithArguments::lockRequests();
     318          70 : }
     319             : 
     320             : inline
     321          70 : void EMMI::unlockRequests() {
     322             :   ActionAtomistic::unlockRequests();
     323             :   ActionWithArguments::unlockRequests();
     324          70 : }
     325             : 
     326             : inline
     327          10 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
     328          10 :   if( getNumberOfArguments()>0 ) {
     329           0 :     ActionWithArguments::calculateNumericalDerivatives( a );
     330             :   }
     331          10 :   if( getNumberOfAtoms()>0 ) {
     332          10 :     Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
     333          44 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     334          34 :       for(unsigned i=0; i<getNumberOfArguments(); ++i)
     335           0 :         if(getPntrToComponent(j)->hasDerivatives()) {
     336           0 :           save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
     337             :         }
     338             :     }
     339          10 :     calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
     340          44 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     341          34 :       for(unsigned i=0; i<getNumberOfArguments(); ++i)
     342           0 :         if(getPntrToComponent(j)->hasDerivatives()) {
     343           0 :           getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
     344             :         }
     345             :     }
     346             :   }
     347          10 : }
     348             : 
     349             : inline
     350          70 : void EMMI::apply() {
     351             :   bool wasforced=false;
     352          70 :   forcesToApply.assign(forcesToApply.size(),0.0);
     353         382 :   for(int i=0; i<getNumberOfComponents(); ++i) {
     354         312 :     if( getPntrToComponent(i)->applyForce( forces ) ) {
     355             :       wasforced=true;
     356           0 :       for(unsigned i=0; i<forces.size(); ++i) {
     357           0 :         forcesToApply[i]+=forces[i];
     358             :       }
     359             :     }
     360             :   }
     361          70 :   if( wasforced ) {
     362           0 :     addForcesOnArguments( forcesToApply );
     363           0 :     if( getNumberOfAtoms()>0 ) {
     364           0 :       setForcesOnAtoms( forcesToApply, getNumberOfArguments() );
     365             :     }
     366             :   }
     367          70 : }
     368             : 
     369             : inline
     370             : void EMMI::setArgDerivatives(Value *v, const double &d) {
     371             :   v->addDerivative(0,d);
     372             : }
     373             : 
     374             : inline
     375    10961336 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
     376    10961336 :   const unsigned noa=getNumberOfArguments();
     377    10961336 :   v->addDerivative(noa+3*i+0,d[0]);
     378    10961336 :   v->addDerivative(noa+3*i+1,d[1]);
     379    10961336 :   v->addDerivative(noa+3*i+2,d[2]);
     380    10961336 : }
     381             : 
     382             : inline
     383       18220 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
     384       18220 :   const unsigned noa=getNumberOfArguments();
     385             :   const unsigned nat=getNumberOfAtoms();
     386       18220 :   v->addDerivative(noa+3*nat+0,d(0,0));
     387       18220 :   v->addDerivative(noa+3*nat+1,d(0,1));
     388       18220 :   v->addDerivative(noa+3*nat+2,d(0,2));
     389       18220 :   v->addDerivative(noa+3*nat+3,d(1,0));
     390       18220 :   v->addDerivative(noa+3*nat+4,d(1,1));
     391       18220 :   v->addDerivative(noa+3*nat+5,d(1,2));
     392       18220 :   v->addDerivative(noa+3*nat+6,d(2,0));
     393       18220 :   v->addDerivative(noa+3*nat+7,d(2,1));
     394       18220 :   v->addDerivative(noa+3*nat+8,d(2,2));
     395       18220 : }
     396             : 
     397       13845 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
     398             : 
     399          34 : void EMMI::registerKeywords( Keywords& keys ) {
     400          34 :   Action::registerKeywords( keys );
     401          34 :   ActionAtomistic::registerKeywords( keys );
     402          34 :   ActionWithValue::registerKeywords( keys );
     403          34 :   ActionWithArguments::registerKeywords( keys );
     404          34 :   keys.use("ARG");
     405          68 :   keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
     406          68 :   keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
     407          68 :   keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
     408          68 :   keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
     409          68 :   keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
     410          68 :   keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
     411          68 :   keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
     412          68 :   keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
     413          68 :   keys.add("optional","SIGMA0","initial value of the uncertainty");
     414          68 :   keys.add("optional","DSIGMA","MC step for uncertainties");
     415          68 :   keys.add("optional","MC_STRIDE", "Monte Carlo stride");
     416          68 :   keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
     417          68 :   keys.add("optional","OV_FILE","file with experimental overlaps");
     418          68 :   keys.add("optional","NORM_DENSITY","integral of the experimental density");
     419          68 :   keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
     420          68 :   keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
     421          68 :   keys.add("optional","REGRESSION","regression stride");
     422          68 :   keys.add("optional","REG_SCALE_MIN","regression minimum scale");
     423          68 :   keys.add("optional","REG_SCALE_MAX","regression maximum scale");
     424          68 :   keys.add("optional","REG_DSCALE","regression maximum scale MC move");
     425          68 :   keys.add("optional","SCALE","scale factor");
     426          68 :   keys.add("optional","ANNEAL", "Length of annealing cycle");
     427          68 :   keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
     428          68 :   keys.add("optional","TEMP","temperature");
     429          68 :   keys.add("optional","PRIOR", "exponent of uncertainty prior");
     430          68 :   keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
     431          68 :   keys.add("optional","WRITE_OV","write a file with model overlaps");
     432          68 :   keys.add("optional","AVERAGING", "Averaging window for weights");
     433          68 :   keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
     434          68 :   keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
     435          34 :   componentsAreNotOptional(keys);
     436          68 :   keys.addOutputComponent("scoreb","default","Bayesian score");
     437          68 :   keys.addOutputComponent("acc",   "NOISETYPE","MC acceptance for uncertainty");
     438          68 :   keys.addOutputComponent("scale", "REGRESSION","scale factor");
     439          68 :   keys.addOutputComponent("accscale", "REGRESSION","MC acceptance for scale regression");
     440          68 :   keys.addOutputComponent("enescale", "REGRESSION","MC energy for scale regression");
     441          68 :   keys.addOutputComponent("anneal","ANNEAL","annealing factor");
     442          68 :   keys.addOutputComponent("weight",       "REWEIGHT",     "weights of the weighted average");
     443          68 :   keys.addOutputComponent("biasDer",      "REWEIGHT",     "derivatives with respect to the bias");
     444          68 :   keys.addOutputComponent("sigma",      "NOISETYPE",     "uncertainty in the forward models and experiment");
     445          68 :   keys.addOutputComponent("neff",         "default",      "effective number of replicas");
     446          34 : }
     447             : 
     448          30 : EMMI::EMMI(const ActionOptions&ao):
     449             :   Action(ao),
     450             :   ActionAtomistic(ao),
     451             :   ActionWithArguments(ao),
     452             :   ActionWithValue(ao),
     453          30 :   inv_sqrt2_(0.707106781186548),
     454          30 :   sqrt2_pi_(0.797884560802865),
     455          30 :   first_time_(true), no_aver_(false), pbc_(true),
     456          30 :   MCstride_(1), MCaccept_(0.), MCtrials_(0.),
     457          60 :   statusstride_(0), first_status_(true),
     458          30 :   nregres_(0), scale_(1.),
     459          30 :   dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
     460          60 :   kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
     461          30 :   do_reweight_(false), first_time_w_(true), decay_w_(1.) {
     462             :   // periodic boundary conditions
     463          30 :   bool nopbc=!pbc_;
     464          30 :   parseFlag("NOPBC",nopbc);
     465          30 :   pbc_=!nopbc;
     466             : 
     467             :   // list of atoms
     468             :   std::vector<AtomNumber> atoms;
     469          60 :   parseAtomList("ATOMS", atoms);
     470             : 
     471             :   // file with data GMM
     472             :   std::string GMM_file;
     473          60 :   parse("GMM_FILE", GMM_file);
     474             : 
     475             :   // type of data noise
     476             :   std::string noise;
     477          60 :   parse("NOISETYPE",noise);
     478          30 :   if      (noise=="GAUSS") {
     479          18 :     noise_ = 0;
     480          12 :   } else if(noise=="OUTLIERS") {
     481           6 :     noise_ = 1;
     482           6 :   } else if(noise=="MARGINAL") {
     483           6 :     noise_ = 2;
     484             :   } else {
     485           0 :     error("Unknown noise type!");
     486             :   }
     487             : 
     488             :   // minimum value for error
     489             :   double sigma_min;
     490          30 :   parse("SIGMA_MIN", sigma_min);
     491          30 :   if(sigma_min<0) {
     492           0 :     error("SIGMA_MIN should be greater or equal to zero");
     493             :   }
     494             : 
     495             :   // the following parameters must be specified with noise type 0 and 1
     496             :   double sigma_ini, dsigma;
     497          30 :   if(noise_!=2) {
     498             :     // initial value of the uncertainty
     499          24 :     parse("SIGMA0", sigma_ini);
     500          24 :     if(sigma_ini<=0) {
     501           0 :       error("you must specify a positive SIGMA0");
     502             :     }
     503             :     // MC parameters
     504          24 :     parse("DSIGMA", dsigma);
     505          24 :     if(dsigma<0) {
     506           0 :       error("you must specify a positive DSIGMA");
     507             :     }
     508          24 :     parse("MC_STRIDE", MCstride_);
     509          24 :     if(dsigma>0 && MCstride_<=0) {
     510           0 :       error("you must specify a positive MC_STRIDE");
     511             :     }
     512             :     // status file parameters
     513          24 :     parse("WRITE_STRIDE", statusstride_);
     514          24 :     if(statusstride_==0) {
     515           0 :       error("you must specify a positive WRITE_STRIDE");
     516             :     }
     517          48 :     parse("STATUS_FILE",  statusfilename_);
     518          24 :     if(statusfilename_=="") {
     519          48 :       statusfilename_ = "MISTATUS"+getLabel();
     520             :     } else {
     521           0 :       statusfilename_ = statusfilename_+getLabel();
     522             :     }
     523             :   }
     524             : 
     525             :   // error file
     526             :   std::string errfile;
     527          60 :   parse("ERR_FILE", errfile);
     528             : 
     529             :   // file with experimental overlaps
     530             :   std::string ovfile;
     531          30 :   parse("OV_FILE", ovfile);
     532             : 
     533             :   // integral of the experimetal density
     534          30 :   double norm_d = 0.0;
     535          30 :   parse("NORM_DENSITY", norm_d);
     536             : 
     537             :   // temperature
     538          30 :   double temp=0.0;
     539          30 :   parse("TEMP",temp);
     540             :   // convert temp to kbt
     541          30 :   if(temp>0.0) {
     542          28 :     kbt_=plumed.getAtoms().getKBoltzmann()*temp;
     543             :   } else {
     544           2 :     kbt_=plumed.getAtoms().getKbT();
     545             :   }
     546             : 
     547             :   // exponent of uncertainty prior
     548          30 :   parse("PRIOR",prior_);
     549             : 
     550             :   // simulated annealing stuff
     551          30 :   parse("ANNEAL", nanneal_);
     552          30 :   parse("ANNEAL_FACT", kanneal_);
     553          30 :   if(nanneal_>0 && kanneal_<=1.0) {
     554           0 :     error("with ANNEAL, ANNEAL_FACT must be greater than 1");
     555             :   }
     556             : 
     557             :   // regression stride
     558          30 :   parse("REGRESSION",nregres_);
     559             :   // other regression parameters
     560          30 :   if(nregres_>0) {
     561           0 :     parse("REG_SCALE_MIN",scale_min_);
     562           0 :     parse("REG_SCALE_MAX",scale_max_);
     563           0 :     parse("REG_DSCALE",dscale_);
     564             :     // checks
     565           0 :     if(scale_max_<=scale_min_) {
     566           0 :       error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
     567             :     }
     568           0 :     if(dscale_<=0.) {
     569           0 :       error("with REGRESSION, REG_DSCALE must be positive");
     570             :     }
     571             :   }
     572             : 
     573             :   // scale factor
     574          30 :   parse("SCALE", scale_);
     575             : 
     576             :   // read map resolution
     577             :   double reso;
     578          30 :   parse("RESOLUTION", reso);
     579          30 :   if(reso<=0.) {
     580           0 :     error("RESOLUTION should be strictly positive");
     581             :   }
     582             : 
     583             :   // neighbor list stuff
     584          30 :   parse("NL_CUTOFF",nl_cutoff_);
     585          30 :   if(nl_cutoff_<=0.0) {
     586           0 :     error("NL_CUTOFF should be explicitly specified and positive");
     587             :   }
     588          30 :   parse("NL_STRIDE",nl_stride_);
     589          30 :   if(nl_stride_==0) {
     590           0 :     error("NL_STRIDE should be explicitly specified and positive");
     591             :   }
     592             : 
     593             :   // averaging or not
     594          30 :   parseFlag("NO_AVER",no_aver_);
     595             : 
     596             :   // write overlap file
     597          30 :   parse("WRITE_OV_STRIDE", ovstride_);
     598          30 :   parse("WRITE_OV", ovfilename_);
     599          32 :   if(ovstride_>0 && ovfilename_=="") {
     600           0 :     error("With WRITE_OV_STRIDE you must specify WRITE_OV");
     601             :   }
     602             : 
     603             :   // set parallel stuff
     604          30 :   size_=comm.Get_size();
     605          30 :   rank_=comm.Get_rank();
     606             : 
     607             :   // get number of replicas
     608          30 :   if(rank_==0) {
     609          18 :     if(no_aver_) {
     610          16 :       nrep_ = 1;
     611             :     } else {
     612           2 :       nrep_ = multi_sim_comm.Get_size();
     613             :     }
     614          18 :     replica_ = multi_sim_comm.Get_rank();
     615             :   } else {
     616          12 :     nrep_ = 0;
     617          12 :     replica_ = 0;
     618             :   }
     619          30 :   comm.Sum(&nrep_,1);
     620          30 :   comm.Sum(&replica_,1);
     621             : 
     622             :   // Reweighting flag
     623          30 :   parseFlag("REWEIGHT", do_reweight_);
     624          30 :   if(do_reweight_&&getNumberOfArguments()!=1) {
     625           0 :     error("To REWEIGHT one must provide one single bias as an argument");
     626             :   }
     627          30 :   if(do_reweight_&&no_aver_) {
     628           0 :     error("REWEIGHT cannot be used with NO_AVER");
     629             :   }
     630          30 :   if(do_reweight_&&nrep_<2) {
     631           0 :     error("REWEIGHT can only be used in parallel with 2 or more replicas");
     632             :   }
     633          30 :   if(!getRestart()) {
     634          30 :     average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
     635             :   } else {
     636           0 :     average_weights_.resize(nrep_, 0.);
     637             :   }
     638             : 
     639          30 :   unsigned averaging=0;
     640          30 :   parse("AVERAGING", averaging);
     641          30 :   if(averaging>0) {
     642           2 :     decay_w_ = 1./static_cast<double> (averaging);
     643             :   }
     644             : 
     645          30 :   checkRead();
     646             : 
     647          30 :   log.printf("  atoms involved : ");
     648       16906 :   for(unsigned i=0; i<atoms.size(); ++i) {
     649       16876 :     log.printf("%d ",atoms[i].serial());
     650             :   }
     651          30 :   log.printf("\n");
     652          30 :   log.printf("  GMM data file : %s\n", GMM_file.c_str());
     653          30 :   if(no_aver_) {
     654          28 :     log.printf("  without ensemble averaging\n");
     655             :   }
     656          30 :   log.printf("  type of data noise : %s\n", noise.c_str());
     657          30 :   log.printf("  neighbor list cutoff : %lf\n", nl_cutoff_);
     658          30 :   log.printf("  neighbor list stride : %u\n",  nl_stride_);
     659          30 :   log.printf("  minimum uncertainty : %f\n",sigma_min);
     660          30 :   log.printf("  scale factor : %lf\n",scale_);
     661          30 :   if(nregres_>0) {
     662           0 :     log.printf("  regression stride : %u\n", nregres_);
     663           0 :     log.printf("  regression minimum scale : %lf\n", scale_min_);
     664           0 :     log.printf("  regression maximum scale : %lf\n", scale_max_);
     665           0 :     log.printf("  regression maximum scale MC move : %lf\n", dscale_);
     666             :   }
     667          30 :   if(noise_!=2) {
     668          24 :     log.printf("  initial value of the uncertainty : %f\n",sigma_ini);
     669          24 :     log.printf("  max MC move in uncertainty : %f\n",dsigma);
     670          24 :     log.printf("  MC stride : %u\n", MCstride_);
     671          24 :     log.printf("  reading/writing to status file : %s\n",statusfilename_.c_str());
     672          24 :     log.printf("  with stride : %u\n",statusstride_);
     673             :   }
     674          30 :   if(errfile.size()>0) {
     675           0 :     log.printf("  reading experimental errors from file : %s\n", errfile.c_str());
     676             :   }
     677          30 :   if(ovfile.size()>0) {
     678           0 :     log.printf("  reading experimental overlaps from file : %s\n", ovfile.c_str());
     679             :   }
     680          30 :   log.printf("  temperature of the system in energy unit : %f\n",kbt_);
     681          30 :   log.printf("  prior exponent : %f\n",prior_);
     682          30 :   log.printf("  number of replicas for averaging: %u\n",nrep_);
     683          30 :   log.printf("  id of the replica : %u\n",replica_);
     684          30 :   if(nanneal_>0) {
     685           4 :     log.printf("  length of annealing cycle : %u\n",nanneal_);
     686           4 :     log.printf("  annealing factor : %f\n",kanneal_);
     687             :   }
     688          30 :   if(ovstride_>0) {
     689           2 :     log.printf("  stride for writing model overlaps : %u\n",ovstride_);
     690           2 :     log.printf("  file for writing model overlaps : %s\n", ovfilename_.c_str());
     691             :   }
     692             : 
     693             :   // set constant quantity before calculating stuff
     694          30 :   cfact_ = 1.0/pow( 2.0*pi, 1.5 );
     695             : 
     696             :   // calculate model GMM constant parameters
     697          30 :   std::vector<double> GMM_m_w = get_GMM_m(atoms);
     698             : 
     699             :   // read data GMM parameters
     700          30 :   get_GMM_d(GMM_file);
     701          30 :   log.printf("  number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
     702             : 
     703             :   // normalize atom weight map
     704          30 :   if(norm_d <= 0.0) {
     705          30 :     norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
     706             :   }
     707             :   double norm_m = accumulate(GMM_m_w.begin(),  GMM_m_w.end(),  0.0);
     708             :   // renormalization
     709         150 :   for(unsigned i=0; i<GMM_m_w_.size(); ++i) {
     710         120 :     GMM_m_w_[i] *= norm_d / norm_m;
     711             :   }
     712             : 
     713             :   // read experimental errors
     714             :   std::vector<double> exp_err;
     715          30 :   if(errfile.size()>0) {
     716           0 :     exp_err = read_exp_errors(errfile);
     717             :   }
     718             : 
     719             :   // get self overlaps between data GMM components
     720          30 :   if(ovfile.size()>0) {
     721           0 :     ovdd_ = read_exp_overlaps(ovfile);
     722             :   } else {
     723        2082 :     for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
     724        2052 :       double ov = get_self_overlap(i);
     725        2052 :       ovdd_.push_back(ov);
     726             :     }
     727             :   }
     728             : 
     729          30 :   log.printf("  number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
     730             :   // cycle on GMM groups
     731          60 :   for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
     732          30 :     log.printf("    group %d\n", Gid);
     733             :     // calculate median overlap and experimental error
     734             :     std::vector<double> ovdd;
     735             :     std::vector<double> err;
     736             :     // cycle on the group members
     737        2082 :     for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
     738             :       // GMM id
     739        2052 :       int GMMid = GMM_d_grps_[Gid][i];
     740             :       // add to experimental error
     741        2052 :       if(errfile.size()>0) {
     742           0 :         err.push_back(exp_err[GMMid]);
     743             :       } else {
     744        2052 :         err.push_back(0.);
     745             :       }
     746             :       // add to GMM overlap
     747        2052 :       ovdd.push_back(ovdd_[GMMid]);
     748             :     }
     749             :     // calculate median quantities
     750          30 :     double ovdd_m = get_median(ovdd);
     751          30 :     double err_m  = get_median(err);
     752             :     // print out statistics
     753          30 :     log.printf("     # of members : %zu\n", GMM_d_grps_[Gid].size());
     754          30 :     log.printf("     median overlap : %lf\n", ovdd_m);
     755          30 :     log.printf("     median error : %lf\n", err_m);
     756             :     // add minimum value of sigma for this group of GMMs
     757          30 :     sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
     758             :     // these are only needed with Gaussian and Outliers noise models
     759          30 :     if(noise_!=2) {
     760             :       // set dsigma
     761          24 :       dsigma_.push_back(dsigma * ovdd_m);
     762             :       // set sigma max
     763          24 :       sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
     764             :       // initialize sigma
     765          48 :       sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
     766             :     }
     767             :   }
     768             : 
     769             :   // read status file if restarting
     770          30 :   if(getRestart() && noise_!=2) {
     771           0 :     read_status();
     772             :   }
     773             : 
     774             :   // calculate auxiliary stuff
     775          30 :   calculate_useful_stuff(reso);
     776             : 
     777             :   // prepare data and derivative std::vectors
     778          30 :   ovmd_.resize(ovdd_.size());
     779          30 :   ovmd_ave_.resize(ovdd_.size());
     780          30 :   atom_der_.resize(GMM_m_type_.size());
     781          30 :   GMMid_der_.resize(ovdd_.size());
     782             : 
     783             :   // clear things that are no longer needed
     784             :   GMM_d_cov_.clear();
     785             : 
     786             :   // add components
     787          30 :   addComponentWithDerivatives("scoreb");
     788          30 :   componentIsNotPeriodic("scoreb");
     789             : 
     790          30 :   if(noise_!=2) {
     791          24 :     addComponent("acc");
     792          48 :     componentIsNotPeriodic("acc");
     793             :   }
     794             : 
     795          30 :   if(nregres_>0) {
     796           0 :     addComponent("scale");
     797           0 :     componentIsNotPeriodic("scale");
     798           0 :     addComponent("accscale");
     799           0 :     componentIsNotPeriodic("accscale");
     800           0 :     addComponent("enescale");
     801           0 :     componentIsNotPeriodic("enescale");
     802             :   }
     803             : 
     804          30 :   if(nanneal_>0) {
     805           4 :     addComponent("anneal");
     806           8 :     componentIsNotPeriodic("anneal");
     807             :   }
     808             : 
     809          30 :   if(do_reweight_) {
     810           2 :     addComponent("biasDer");
     811           2 :     componentIsNotPeriodic("biasDer");
     812           2 :     addComponent("weight");
     813           4 :     componentIsNotPeriodic("weight");
     814             :   }
     815             : 
     816          30 :   addComponent("neff");
     817          30 :   componentIsNotPeriodic("neff");
     818             : 
     819          54 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     820             :     std::string num;
     821          24 :     Tools::convert(i, num);
     822          24 :     addComponent("sigma-"+num);
     823          24 :     componentIsNotPeriodic("sigma-"+num);
     824          48 :     getPntrToComponent("sigma-"+num)->set(sigma_[i]);
     825             :   }
     826             : 
     827             :   // initialize random seed
     828             :   unsigned iseed;
     829          30 :   if(rank_==0) {
     830          18 :     iseed = time(NULL)+replica_;
     831             :   } else {
     832          12 :     iseed = 0;
     833             :   }
     834          30 :   comm.Sum(&iseed, 1);
     835          30 :   random_.setSeed(-iseed);
     836             : 
     837             :   // request the atoms
     838          30 :   requestAtoms(atoms);
     839          30 :   setDerivatives();
     840             : 
     841             :   // print bibliography
     842          60 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     843          60 :   log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
     844          60 :   log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
     845          30 :   if(do_reweight_) {
     846           4 :     log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
     847             :   }
     848          30 :   if(!no_aver_ && nrep_>1) {
     849           4 :     log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
     850             :   }
     851          30 :   log<<"\n";
     852          30 : }
     853             : 
     854           4 : void EMMI::write_model_overlap(long long int step) {
     855           4 :   OFile ovfile;
     856           4 :   ovfile.link(*this);
     857             :   std::string num;
     858           4 :   Tools::convert(step,num);
     859           4 :   std::string name = ovfilename_+"-"+num;
     860           4 :   ovfile.open(name);
     861             :   ovfile.setHeavyFlush();
     862           4 :   ovfile.fmtField("%10.7e ");
     863             : // write overlaps
     864          40 :   for(int i=0; i<ovmd_.size(); ++i) {
     865          36 :     ovfile.printField("Model", ovmd_[i]);
     866          72 :     ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
     867          36 :     ovfile.printField("Data", ovdd_[i]);
     868          36 :     ovfile.printField();
     869             :   }
     870           4 :   ovfile.close();
     871           4 : }
     872             : 
     873          60 : double EMMI::get_median(std::vector<double> &v) {
     874             : // dimension of std::vector
     875          60 :   unsigned size = v.size();
     876             : // in case of only one entry
     877          60 :   if (size==1) {
     878           0 :     return v[0];
     879             :   } else {
     880             :     // reorder std::vector
     881          60 :     sort(v.begin(), v.end());
     882             :     // odd or even?
     883          60 :     if (size%2==0) {
     884           4 :       return (v[size/2-1]+v[size/2])/2.0;
     885             :     } else {
     886          56 :       return v[size/2];
     887             :     }
     888             :   }
     889             : }
     890             : 
     891           0 : void EMMI::read_status() {
     892             :   double MDtime;
     893             : // open file
     894           0 :   auto ifile = Tools::make_unique<IFile>();
     895           0 :   ifile->link(*this);
     896           0 :   if(ifile->FileExist(statusfilename_)) {
     897           0 :     ifile->open(statusfilename_);
     898           0 :     while(ifile->scanField("MD_time", MDtime)) {
     899           0 :       for(unsigned i=0; i<sigma_.size(); ++i) {
     900             :         // convert i to std::string
     901             :         std::string num;
     902           0 :         Tools::convert(i,num);
     903             :         // read entries
     904           0 :         ifile->scanField("s"+num, sigma_[i]);
     905             :       }
     906             :       // new line
     907           0 :       ifile->scanField();
     908             :     }
     909           0 :     ifile->close();
     910             :   } else {
     911           0 :     error("Cannot find status file "+statusfilename_+"\n");
     912             :   }
     913           0 : }
     914             : 
     915       12729 : void EMMI::print_status(long long int step) {
     916             : // if first time open the file
     917       12729 :   if(first_status_) {
     918          24 :     first_status_ = false;
     919          24 :     statusfile_.link(*this);
     920          24 :     statusfile_.open(statusfilename_);
     921             :     statusfile_.setHeavyFlush();
     922          48 :     statusfile_.fmtField("%6.3e ");
     923             :   }
     924             : // write fields
     925       12729 :   double MDtime = static_cast<double>(step)*getTimeStep();
     926       12729 :   statusfile_.printField("MD_time", MDtime);
     927       25458 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     928             :     // convert i to std::string
     929             :     std::string num;
     930       12729 :     Tools::convert(i,num);
     931             :     // print entry
     932       25458 :     statusfile_.printField("s"+num, sigma_[i]);
     933             :   }
     934       12729 :   statusfile_.printField();
     935       12729 : }
     936             : 
     937           0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
     938             :   bool accept = false;
     939             :   // calculate delta energy
     940           0 :   double delta = ( newE - oldE ) / kbt;
     941             :   // if delta is negative always accept move
     942           0 :   if( delta < 0.0 ) {
     943             :     accept = true;
     944             :   } else {
     945             :     // otherwise extract random number
     946           0 :     double s = random_.RandU01();
     947           0 :     if( s < std::exp(-delta) ) {
     948             :       accept = true;
     949             :     }
     950             :   }
     951           0 :   return accept;
     952             : }
     953             : 
     954           0 : void EMMI::doMonteCarlo() {
     955             :   // extract random GMM group
     956           0 :   unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
     957           0 :   if(nGMM==GMM_d_grps_.size()) {
     958           0 :     nGMM -= 1;
     959             :   }
     960             : 
     961             :   // generate random move
     962           0 :   double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
     963             :   // new sigma
     964           0 :   double new_s = sigma_[nGMM] + shift;
     965             :   // check boundaries
     966           0 :   if(new_s > sigma_max_[nGMM]) {
     967           0 :     new_s = 2.0 * sigma_max_[nGMM] - new_s;
     968             :   }
     969           0 :   if(new_s < sigma_min_[nGMM]) {
     970           0 :     new_s = 2.0 * sigma_min_[nGMM] - new_s;
     971             :   }
     972             :   // old s2
     973           0 :   double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
     974             :   // new s2
     975           0 :   double new_inv_s2 = 1.0 / new_s / new_s;
     976             : 
     977             :   // cycle on GMM group and calculate old and new energy
     978             :   double old_ene = 0.0;
     979             :   double new_ene = 0.0;
     980           0 :   double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
     981             : 
     982             :   // in case of Gaussian noise
     983           0 :   if(noise_==0) {
     984             :     double chi2 = 0.0;
     985           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
     986             :       // id GMM component
     987           0 :       int GMMid = GMM_d_grps_[nGMM][i];
     988             :       // deviation
     989           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
     990             :       // add to chi2
     991           0 :       chi2 += dev * dev;
     992             :     }
     993             :     // final energy calculation: add normalization and prior
     994           0 :     old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
     995           0 :     new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
     996             :   }
     997             : 
     998             :   // in case of Outliers noise
     999           0 :   if(noise_==1) {
    1000           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
    1001             :       // id GMM component
    1002           0 :       int GMMid = GMM_d_grps_[nGMM][i];
    1003             :       // calculate deviation
    1004           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
    1005             :       // add to energies
    1006           0 :       old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
    1007           0 :       new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
    1008             :     }
    1009             :     // final energy calculation: add normalization and prior
    1010           0 :     old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
    1011           0 :     new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
    1012             :   }
    1013             : 
    1014             :   // increment number of trials
    1015           0 :   MCtrials_ += 1.0;
    1016             : 
    1017             :   // accept or reject
    1018           0 :   bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
    1019           0 :   if(accept) {
    1020           0 :     sigma_[nGMM] = new_s;
    1021           0 :     MCaccept_ += 1.0;
    1022             :   }
    1023             :   // local communication
    1024           0 :   if(rank_!=0) {
    1025           0 :     for(unsigned i=0; i<sigma_.size(); ++i) {
    1026           0 :       sigma_[i] = 0.0;
    1027             :     }
    1028           0 :     MCaccept_ = 0.0;
    1029             :   }
    1030           0 :   if(size_>1) {
    1031           0 :     comm.Sum(&sigma_[0], sigma_.size());
    1032           0 :     comm.Sum(&MCaccept_, 1);
    1033             :   }
    1034             : 
    1035             :   // update sigma output
    1036             :   std::string num;
    1037           0 :   Tools::convert(nGMM, num);
    1038           0 :   getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
    1039           0 : }
    1040             : 
    1041           0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile) {
    1042             :   int nexp, idcomp;
    1043             :   double err;
    1044             :   std::vector<double> exp_err;
    1045             : // open file
    1046           0 :   IFile *ifile = new IFile();
    1047           0 :   if(ifile->FileExist(errfile)) {
    1048           0 :     ifile->open(errfile);
    1049             :     // scan for number of experimental errors
    1050           0 :     ifile->scanField("Nexp", nexp);
    1051             :     // cycle on GMM components
    1052           0 :     while(ifile->scanField("Id",idcomp)) {
    1053             :       // total experimental error
    1054           0 :       double err_tot = 0.0;
    1055             :       // cycle on number of experimental overlaps
    1056           0 :       for(unsigned i=0; i<nexp; ++i) {
    1057             :         std::string ss;
    1058           0 :         Tools::convert(i,ss);
    1059           0 :         ifile->scanField("Err"+ss, err);
    1060             :         // add to total error
    1061           0 :         err_tot += err*err;
    1062             :       }
    1063             :       // new line
    1064           0 :       ifile->scanField();
    1065             :       // calculate RMSE
    1066           0 :       err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
    1067             :       // add to global
    1068           0 :       exp_err.push_back(err_tot);
    1069             :     }
    1070           0 :     ifile->close();
    1071             :   } else {
    1072           0 :     error("Cannot find ERR_FILE "+errfile+"\n");
    1073             :   }
    1074           0 :   return exp_err;
    1075             : }
    1076             : 
    1077           0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile) {
    1078             :   int idcomp;
    1079             :   double ov;
    1080             :   std::vector<double> ovdd;
    1081             : // open file
    1082           0 :   IFile *ifile = new IFile();
    1083           0 :   if(ifile->FileExist(ovfile)) {
    1084           0 :     ifile->open(ovfile);
    1085             :     // cycle on GMM components
    1086           0 :     while(ifile->scanField("Id",idcomp)) {
    1087             :       // read experimental overlap
    1088           0 :       ifile->scanField("Overlap", ov);
    1089             :       // add to ovdd
    1090           0 :       ovdd.push_back(ov);
    1091             :       // new line
    1092           0 :       ifile->scanField();
    1093             :     }
    1094           0 :     ifile->close();
    1095             :   } else {
    1096           0 :     error("Cannot find OV_FILE "+ovfile+"\n");
    1097             :   }
    1098           0 :   return ovdd;
    1099             : }
    1100             : 
    1101          30 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms) {
    1102             :   // list of weights - one per atom
    1103             :   std::vector<double> GMM_m_w;
    1104             : 
    1105          30 :   auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
    1106             :   // map of atom types to A and B coefficients of scattering factor
    1107             :   // f(s) = A * exp(-B*s**2)
    1108             :   // B is in Angstrom squared
    1109             :   // map between an atom type and an index
    1110             :   std::map<std::string, unsigned> type_map;
    1111          30 :   type_map["C"]=0;
    1112          30 :   type_map["O"]=1;
    1113          30 :   type_map["N"]=2;
    1114          30 :   type_map["S"]=3;
    1115             :   // fill in sigma std::vector
    1116          30 :   GMM_m_s_.push_back(0.01*15.146);  // type 0
    1117          30 :   GMM_m_s_.push_back(0.01*8.59722); // type 1
    1118          30 :   GMM_m_s_.push_back(0.01*11.1116); // type 2
    1119          30 :   GMM_m_s_.push_back(0.01*15.8952); // type 3
    1120             :   // fill in weight std::vector
    1121          30 :   GMM_m_w_.push_back(2.49982); // type 0
    1122          30 :   GMM_m_w_.push_back(1.97692); // type 1
    1123          30 :   GMM_m_w_.push_back(2.20402); // type 2
    1124          30 :   GMM_m_w_.push_back(5.14099); // type 3
    1125             : 
    1126             :   // check if MOLINFO line is present
    1127          30 :   if( moldat ) {
    1128          30 :     log<<"  MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
    1129       16906 :     for(unsigned i=0; i<atoms.size(); ++i) {
    1130             :       // get atom name
    1131       16876 :       std::string name = moldat->getAtomName(atoms[i]);
    1132             :       char type;
    1133             :       // get atom type
    1134       16876 :       char first = name.at(0);
    1135             :       // GOLDEN RULE: type is first letter, if not a number
    1136       16876 :       if (!isdigit(first)) {
    1137             :         type = first;
    1138             :         // otherwise is the second
    1139             :       } else {
    1140           0 :         type = name.at(1);
    1141             :       }
    1142             :       // check if key in map
    1143       16876 :       std::string type_s = std::string(1,type);
    1144       16876 :       if(type_map.find(type_s) != type_map.end()) {
    1145             :         // save atom type
    1146       16876 :         GMM_m_type_.push_back(type_map[type_s]);
    1147             :         // this will be normalized in the final density
    1148       16876 :         GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
    1149             :       } else {
    1150           0 :         error("Wrong atom type "+type_s+" from atom name "+name+"\n");
    1151             :       }
    1152             :     }
    1153             :   } else {
    1154           0 :     error("MOLINFO DATA not found\n");
    1155             :   }
    1156          30 :   return GMM_m_w;
    1157             : }
    1158             : 
    1159        2052 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w) {
    1160             : 
    1161             : // check if positive defined, by calculating the 3 leading principal minors
    1162        2052 :   double pm1 = cov[0];
    1163        2052 :   double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
    1164        2052 :   double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
    1165             : // apply Sylvester’s criterion
    1166        2052 :   if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0) {
    1167           0 :     error("check data GMM: covariance matrix is not positive defined");
    1168             :   }
    1169             : 
    1170             : // check if weight is positive
    1171        2052 :   if(w<=0) {
    1172           0 :     error("check data GMM: weight must be positive");
    1173             :   }
    1174        2052 : }
    1175             : 
    1176             : // read GMM data file in PLUMED format:
    1177          30 : void EMMI::get_GMM_d(std::string GMM_file) {
    1178          30 :   VectorGeneric<6> cov;
    1179             : 
    1180             : // open file
    1181          30 :   auto ifile=Tools::make_unique<IFile>();
    1182          30 :   if(ifile->FileExist(GMM_file)) {
    1183          30 :     ifile->open(GMM_file);
    1184             :     int idcomp;
    1185        4164 :     while(ifile->scanField("Id",idcomp)) {
    1186             :       int beta;
    1187             :       double w, m0, m1, m2;
    1188        4104 :       ifile->scanField("Weight",w);
    1189        4104 :       ifile->scanField("Mean_0",m0);
    1190        4104 :       ifile->scanField("Mean_1",m1);
    1191        4104 :       ifile->scanField("Mean_2",m2);
    1192        4104 :       ifile->scanField("Cov_00",cov[0]);
    1193        4104 :       ifile->scanField("Cov_01",cov[1]);
    1194        4104 :       ifile->scanField("Cov_02",cov[2]);
    1195        4104 :       ifile->scanField("Cov_11",cov[3]);
    1196        4104 :       ifile->scanField("Cov_12",cov[4]);
    1197        4104 :       ifile->scanField("Cov_22",cov[5]);
    1198        2052 :       ifile->scanField("Beta",beta);
    1199             :       // check input
    1200        2052 :       check_GMM_d(cov, w);
    1201             :       // check beta
    1202        2052 :       if(beta<0) {
    1203           0 :         error("Beta must be positive!");
    1204             :       }
    1205             :       // center of the Gaussian
    1206        2052 :       GMM_d_m_.push_back(Vector(m0,m1,m2));
    1207             :       // covariance matrix
    1208        2052 :       GMM_d_cov_.push_back(cov);
    1209             :       // weight
    1210        2052 :       GMM_d_w_.push_back(w);
    1211             :       // beta
    1212        2052 :       GMM_d_beta_.push_back(beta);
    1213             :       // new line
    1214        2052 :       ifile->scanField();
    1215             :     }
    1216             :   } else {
    1217           0 :     error("Cannot find GMM_FILE "+GMM_file+"\n");
    1218             :   }
    1219             :   // now create a set from beta (unique set of values)
    1220          30 :   std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
    1221             :   // now prepare the group std::vector
    1222          30 :   GMM_d_grps_.resize(bu.size());
    1223             :   // and fill it in
    1224        2082 :   for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
    1225        2052 :     if(GMM_d_beta_[i]>=GMM_d_grps_.size()) {
    1226           0 :       error("Check Beta values");
    1227             :     }
    1228        2052 :     GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
    1229             :   }
    1230          30 : }
    1231             : 
    1232          30 : void EMMI::calculate_useful_stuff(double reso) {
    1233             :   // We use the following definition for resolution:
    1234             :   // the Fourier transform of the density distribution in real space
    1235             :   // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
    1236             :   // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
    1237             :   // average value of B
    1238             :   double Bave = 0.0;
    1239       16906 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1240       16876 :     Bave += GMM_m_s_[GMM_m_type_[i]];
    1241             :   }
    1242          30 :   Bave /= static_cast<double>(GMM_m_type_.size());
    1243             :   // calculate blur factor
    1244             :   double blur = 0.0;
    1245          30 :   if(reso*reso>Bave) {
    1246           2 :     blur = reso*reso-Bave;
    1247             :   } else {
    1248          56 :     warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
    1249             :   }
    1250             :   // add blur to B
    1251         150 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
    1252         120 :     GMM_m_s_[i] += blur;
    1253             :   }
    1254             :   // calculate average resolution
    1255             :   double ave_res = 0.0;
    1256       16906 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1257       16876 :     ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
    1258             :   }
    1259          30 :   ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
    1260          30 :   log.printf("  experimental map resolution : %3.2f\n", reso);
    1261          30 :   log.printf("  predicted map resolution : %3.2f\n", ave_res);
    1262          30 :   log.printf("  blur factor : %f\n", blur);
    1263             :   // now calculate useful stuff
    1264          30 :   VectorGeneric<6> cov, sum, inv_sum;
    1265             :   // cycle on all atoms types (4 for the moment)
    1266         150 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
    1267             :     // the Gaussian in density (real) space is the FT of scattering factor
    1268             :     // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
    1269         120 :     double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
    1270             :     // covariance matrix for spherical Gaussian
    1271         120 :     cov[0]=s*s;
    1272         120 :     cov[1]=0.0;
    1273         120 :     cov[2]=0.0;
    1274         120 :     cov[3]=s*s;
    1275         120 :     cov[4]=0.0;
    1276         120 :     cov[5]=s*s;
    1277             :     // cycle on all data GMM
    1278        8328 :     for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
    1279             :       // we need the sum of the covariance matrices
    1280       57456 :       for(unsigned k=0; k<6; ++k) {
    1281       49248 :         sum[k] = cov[k] + GMM_d_cov_[j][k];
    1282             :       }
    1283             :       // and to calculate its determinant
    1284        8208 :       double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1285        8208 :       det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1286        8208 :       det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1287             :       // calculate prefactor - model weights are already normalized
    1288        8208 :       double pre_fact =  cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
    1289             :       // and its inverse
    1290        8208 :       inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1291        8208 :       inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1292        8208 :       inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1293        8208 :       inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1294        8208 :       inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1295        8208 :       inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1296             :       // now we store the prefactor
    1297        8208 :       pre_fact_.push_back(pre_fact);
    1298             :       // and the inverse of the sum
    1299        8208 :       inv_cov_md_.push_back(inv_sum);
    1300             :     }
    1301             :   }
    1302             :   // tabulate exponential
    1303          30 :   dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
    1304    30000030 :   for(unsigned i=0; i<nexp_; ++i) {
    1305    30000000 :     tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
    1306             :   }
    1307          30 : }
    1308             : 
    1309             : // get prefactors
    1310     1622268 : double EMMI::get_prefactor_inverse
    1311             : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
    1312             :  double GMM_w_0, double GMM_w_1,
    1313             :  VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum) {
    1314             : // we need the sum of the covariance matrices
    1315    11355876 :   for(unsigned k=0; k<6; ++k) {
    1316     9733608 :     sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
    1317             :   }
    1318             : 
    1319             : // and to calculate its determinant
    1320     1622268 :   double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1321     1622268 :   det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1322     1622268 :   det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1323             : 
    1324             : // the prefactor is
    1325     1622268 :   double pre_fact =  cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
    1326             : 
    1327             : // and its inverse
    1328     1622268 :   inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1329     1622268 :   inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1330     1622268 :   inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1331     1622268 :   inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1332     1622268 :   inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1333     1622268 :   inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1334             : 
    1335             : // return pre-factor
    1336     1622268 :   return pre_fact;
    1337             : }
    1338             : 
    1339        2052 : double EMMI::get_self_overlap(unsigned id) {
    1340             :   double ov_tot = 0.0;
    1341        2052 :   VectorGeneric<6> sum, inv_sum;
    1342        2052 :   Vector ov_der;
    1343             : // start loop
    1344     1624320 :   for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
    1345             :     // call auxiliary method
    1346     1622268 :     double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
    1347     1622268 :                                             GMM_d_w_[id],   GMM_d_w_[i], sum, inv_sum);
    1348             :     // add overlap to ov_tot
    1349     1622268 :     ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
    1350             :   }
    1351             : // and return it
    1352        2052 :   return ov_tot;
    1353             : }
    1354             : 
    1355             : // get overlap and derivatives
    1356     3737255 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
    1357             :                          const VectorGeneric<6> &inv_cov_md, Vector &ov_der) {
    1358     3737255 :   Vector md;
    1359             :   // calculate std::vector difference m_m-d_m with/without pbc
    1360     3737255 :   if(pbc_) {
    1361           0 :     md = pbcDistance(d_m, m_m);
    1362             :   } else {
    1363     3737255 :     md = delta(d_m, m_m);
    1364             :   }
    1365             :   // calculate product of transpose of md and inv_cov_md
    1366     3737255 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1367     3737255 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1368     3737255 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1369             :   // calculate product of prod and md
    1370     3737255 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1371             :   // final calculation
    1372     3737255 :   ov = pre_fact * std::exp(-0.5*ov);
    1373             :   // derivatives
    1374     3737255 :   ov_der = ov * Vector(p_x, p_y, p_z);
    1375     3737255 :   return ov;
    1376             : }
    1377             : 
    1378             : // get the exponent of the overlap
    1379    59106708 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
    1380             :                              const VectorGeneric<6> &inv_cov_md) {
    1381    59106708 :   Vector md;
    1382             :   // calculate std::vector difference m_m-d_m with/without pbc
    1383    59106708 :   if(pbc_) {
    1384           0 :     md = pbcDistance(d_m, m_m);
    1385             :   } else {
    1386    59106708 :     md = delta(d_m, m_m);
    1387             :   }
    1388             :   // calculate product of transpose of md and inv_cov_md
    1389    59106708 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1390    59106708 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1391    59106708 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1392             :   // calculate product of prod and md
    1393    59106708 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1394    59106708 :   return ov;
    1395             : }
    1396             : 
    1397       18180 : void EMMI::update_neighbor_list() {
    1398             :   // dimension of GMM and atom std::vectors
    1399       18180 :   unsigned GMM_d_size = GMM_d_m_.size();
    1400       18180 :   unsigned GMM_m_size = GMM_m_type_.size();
    1401             :   // local neighbor list
    1402             :   std::vector < unsigned > nl_l;
    1403             :   // clear old neighbor list
    1404       18180 :   nl_.clear();
    1405             : 
    1406             :   // cycle on GMM components - in parallel
    1407      118134 :   for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
    1408             :     // overlap lists and map
    1409             :     std::vector<double> ov_l;
    1410             :     std::map<double, unsigned> ov_m;
    1411             :     // total overlap with id
    1412             :     double ov_tot = 0.0;
    1413             :     // cycle on all atoms
    1414    59206662 :     for(unsigned im=0; im<GMM_m_size; ++im) {
    1415             :       // get index in auxiliary lists
    1416    59106708 :       unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1417             :       // calculate exponent of overlap
    1418    59106708 :       double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
    1419             :       // get index of 0.5*expov in tabulated exponential
    1420    59106708 :       unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
    1421             :       // check boundaries and skip atom in case
    1422    59106708 :       if(itab >= tab_exp_.size()) {
    1423    54971808 :         continue;
    1424             :       }
    1425             :       // in case calculate overlap
    1426     4134900 :       double ov = pre_fact_[kaux] * tab_exp_[itab];
    1427             :       // add to list
    1428     4134900 :       ov_l.push_back(ov);
    1429             :       // and map to retrieve atom index
    1430     4134900 :       ov_m[ov] = im;
    1431             :       // increase ov_tot
    1432     4134900 :       ov_tot += ov;
    1433             :     }
    1434             :     // check if zero size -> ov_tot = 0
    1435       99954 :     if(ov_l.size()==0) {
    1436             :       continue;
    1437             :     }
    1438             :     // define cutoff
    1439       99878 :     double ov_cut = ov_tot * nl_cutoff_;
    1440             :     // sort ov_l in ascending order
    1441       99878 :     std::sort(ov_l.begin(), ov_l.end());
    1442             :     // integrate ov_l
    1443             :     double res = 0.0;
    1444     2146886 :     for(unsigned i=0; i<ov_l.size(); ++i) {
    1445     2146886 :       res += ov_l[i];
    1446             :       // if exceeding the cutoff for overlap, stop
    1447     2146886 :       if(res >= ov_cut) {
    1448             :         break;
    1449             :       } else {
    1450             :         ov_m.erase(ov_l[i]);
    1451             :       }
    1452             :     }
    1453             :     // now add atoms to neighborlist
    1454     2187770 :     for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it) {
    1455     2087892 :       nl_l.push_back(id*GMM_m_size+it->second);
    1456             :     }
    1457             :     // end cycle on GMM components in parallel
    1458             :   }
    1459             :   // find total dimension of neighborlist
    1460       18180 :   std::vector <int> recvcounts(size_, 0);
    1461       18180 :   recvcounts[rank_] = nl_l.size();
    1462       18180 :   comm.Sum(&recvcounts[0], size_);
    1463             :   int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
    1464             :   // resize neighbor stuff
    1465       18180 :   nl_.resize(tot_size);
    1466             :   // calculate std::vector of displacement
    1467       18180 :   std::vector<int> disp(size_);
    1468       18180 :   disp[0] = 0;
    1469             :   int rank_size = 0;
    1470       32724 :   for(unsigned i=0; i<size_-1; ++i) {
    1471       14544 :     rank_size += recvcounts[i];
    1472       14544 :     disp[i+1] = rank_size;
    1473             :   }
    1474             :   // Allgather neighbor list
    1475       18180 :   comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
    1476             :   // now resize derivatives
    1477       18180 :   ovmd_der_.resize(tot_size);
    1478       18180 : }
    1479             : 
    1480          70 : void EMMI::prepare() {
    1481          70 :   if(getExchangeStep()) {
    1482           0 :     first_time_=true;
    1483             :   }
    1484          70 : }
    1485             : 
    1486             : // overlap calculator
    1487       18220 : void EMMI::calculate_overlap() {
    1488             : 
    1489       18220 :   if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
    1490       18180 :     update_neighbor_list();
    1491       18180 :     first_time_=false;
    1492             :   }
    1493             : 
    1494             :   // clean temporary std::vectors
    1495      192892 :   for(unsigned i=0; i<ovmd_.size(); ++i) {
    1496      174672 :     ovmd_[i] = 0.0;
    1497             :   }
    1498     3525024 :   for(unsigned i=0; i<ovmd_der_.size(); ++i) {
    1499     3506804 :     ovmd_der_[i] = Vector(0,0,0);
    1500             :   }
    1501             : 
    1502             :   // we have to cycle over all model and data GMM components in the neighbor list
    1503       18220 :   unsigned GMM_d_size = GMM_d_m_.size();
    1504       18220 :   unsigned GMM_m_size = GMM_m_type_.size();
    1505     2133207 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1506             :     // get data (id) and atom (im) indexes
    1507     2114987 :     unsigned id = nl_[i] / GMM_m_size;
    1508     2114987 :     unsigned im = nl_[i] % GMM_m_size;
    1509             :     // get index in auxiliary lists
    1510     2114987 :     unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1511             :     // add overlap with im component of model GMM
    1512     2114987 :     ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
    1513     2114987 :                              inv_cov_md_[kaux], ovmd_der_[i]);
    1514             :   }
    1515             :   // communicate stuff
    1516       18220 :   if(size_>1) {
    1517       14574 :     comm.Sum(&ovmd_[0], ovmd_.size());
    1518       14574 :     comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
    1519             :   }
    1520       18220 : }
    1521             : 
    1522           0 : double EMMI::scaleEnergy(double s) {
    1523             :   double ene = 0.0;
    1524           0 :   for(unsigned i=0; i<ovdd_.size(); ++i) {
    1525           0 :     ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
    1526             :   }
    1527           0 :   return ene;
    1528             : }
    1529             : 
    1530           0 : double EMMI::doRegression() {
    1531             : // standard MC parameters
    1532             :   unsigned MCsteps = 100000;
    1533             :   double kbtmin = 1.0;
    1534             :   double kbtmax = 10.0;
    1535             :   unsigned ncold = 5000;
    1536             :   unsigned nhot = 2000;
    1537             :   double MCacc = 0.0;
    1538             :   double kbt, ebest, scale_best;
    1539             : 
    1540             : // initial value of scale factor and energy
    1541           0 :   double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
    1542           0 :   double ene = scaleEnergy(scale);
    1543             : // set best energy
    1544           0 :   ebest = ene;
    1545             : 
    1546             : // MC loop
    1547           0 :   for(unsigned istep=0; istep<MCsteps; ++istep) {
    1548             :     // get temperature
    1549           0 :     if(istep%(ncold+nhot)<ncold) {
    1550             :       kbt = kbtmin;
    1551             :     } else {
    1552             :       kbt = kbtmax;
    1553             :     }
    1554             :     // propose move in scale
    1555           0 :     double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
    1556           0 :     double new_scale = scale + ds;
    1557             :     // check boundaries
    1558           0 :     if(new_scale > scale_max_) {
    1559           0 :       new_scale = 2.0 * scale_max_ - new_scale;
    1560             :     }
    1561           0 :     if(new_scale < scale_min_) {
    1562           0 :       new_scale = 2.0 * scale_min_ - new_scale;
    1563             :     }
    1564             :     // new energy
    1565           0 :     double new_ene = scaleEnergy(new_scale);
    1566             :     // accept or reject
    1567           0 :     bool accept = doAccept(ene, new_ene, kbt);
    1568             :     // in case of acceptance
    1569           0 :     if(accept) {
    1570             :       scale = new_scale;
    1571             :       ene = new_ene;
    1572           0 :       MCacc += 1.0;
    1573             :     }
    1574             :     // save best
    1575           0 :     if(ene<ebest) {
    1576           0 :       ebest = ene;
    1577           0 :       scale_best = scale;
    1578             :     }
    1579             :   }
    1580             : // calculate acceptance
    1581           0 :   double accscale = MCacc / static_cast<double>(MCsteps);
    1582             : // global communication
    1583           0 :   if(!no_aver_ && nrep_>1) {
    1584           0 :     if(replica_!=0) {
    1585           0 :       scale_best = 0.0;
    1586           0 :       ebest = 0.0;
    1587           0 :       accscale = 0.0;
    1588             :     }
    1589           0 :     if(rank_==0) {
    1590           0 :       multi_sim_comm.Sum(&scale_best, 1);
    1591           0 :       multi_sim_comm.Sum(&ebest, 1);
    1592           0 :       multi_sim_comm.Sum(&accscale, 1);
    1593             :     }
    1594             :   }
    1595             :   // local communication
    1596           0 :   if(rank_!=0) {
    1597           0 :     scale_best = 0.0;
    1598           0 :     ebest = 0.0;
    1599           0 :     accscale = 0.0;
    1600             :   }
    1601           0 :   if(size_>1) {
    1602           0 :     comm.Sum(&scale_best, 1);
    1603           0 :     comm.Sum(&ebest, 1);
    1604           0 :     comm.Sum(&accscale, 1);
    1605             :   }
    1606             : // set scale parameters
    1607           0 :   getPntrToComponent("accscale")->set(accscale);
    1608           0 :   getPntrToComponent("enescale")->set(ebest);
    1609             : // return scale value
    1610           0 :   return scale_best;
    1611             : }
    1612             : 
    1613          20 : double EMMI::get_annealing(long long int step) {
    1614             : // default no annealing
    1615             :   double fact = 1.0;
    1616             : // position in annealing cycle
    1617          20 :   unsigned nc = step%(4*nanneal_);
    1618             : // useful doubles
    1619          20 :   double ncd = static_cast<double>(nc);
    1620          20 :   double nn  = static_cast<double>(nanneal_);
    1621             : // set fact
    1622          20 :   if(nc>=nanneal_   && nc<2*nanneal_) {
    1623           6 :     fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
    1624             :   }
    1625          20 :   if(nc>=2*nanneal_ && nc<3*nanneal_) {
    1626           4 :     fact = kanneal_;
    1627             :   }
    1628          20 :   if(nc>=3*nanneal_) {
    1629           2 :     fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
    1630             :   }
    1631          20 :   return fact;
    1632             : }
    1633             : 
    1634       18220 : void EMMI::get_weights(double &weight, double &norm, double &neff) {
    1635       18220 :   const double dnrep = static_cast<double>(nrep_);
    1636             :   // calculate the weights either from BIAS
    1637       18220 :   if(do_reweight_) {
    1638          12 :     std::vector<double> bias(nrep_,0);
    1639          12 :     if(rank_==0) {
    1640          12 :       bias[replica_] = getArgument(0);
    1641          12 :       if(nrep_>1) {
    1642          12 :         multi_sim_comm.Sum(&bias[0], nrep_);
    1643             :       }
    1644             :     }
    1645          12 :     comm.Sum(&bias[0], nrep_);
    1646             : 
    1647             :     // accumulate weights
    1648          12 :     if(!first_time_w_) {
    1649          30 :       for(unsigned i=0; i<nrep_; ++i) {
    1650          20 :         const double delta=bias[i]-average_weights_[i];
    1651             :         // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
    1652             :         // probably because we're making several calls to calculate(), causing accumulation
    1653             :         // of epsilons. Maybe we can work on a temporary copy of the action instead?
    1654          20 :         average_weights_[i]+=decay_w_*delta;
    1655             :       }
    1656             :     } else {
    1657           2 :       first_time_w_ = false;
    1658           6 :       for(unsigned i=0; i<nrep_; ++i) {
    1659           4 :         average_weights_[i] = bias[i];
    1660             :       }
    1661             :     }
    1662             : 
    1663             :     // set average back into bias and set norm to one
    1664          12 :     const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
    1665          36 :     for(unsigned i=0; i<nrep_; ++i) {
    1666          24 :       bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
    1667             :     }
    1668             :     // set local weight, norm and weight variance
    1669          12 :     weight = bias[replica_];
    1670             :     double w2=0.;
    1671          36 :     for(unsigned i=0; i<nrep_; ++i) {
    1672          24 :       w2 += bias[i]*bias[i];
    1673          24 :       norm += bias[i];
    1674             :     }
    1675          12 :     neff = norm*norm/w2;
    1676          24 :     getPntrToComponent("weight")->set(weight/norm);
    1677             :   } else {
    1678             :     // or arithmetic ones
    1679       18208 :     neff = dnrep;
    1680       18208 :     weight = 1.0;
    1681       18208 :     norm = dnrep;
    1682             :   }
    1683       18220 :   getPntrToComponent("neff")->set(neff);
    1684       18220 : }
    1685             : 
    1686       18220 : void EMMI::calculate() {
    1687             : 
    1688             : // calculate CV
    1689       18220 :   calculate_overlap();
    1690             : 
    1691             :   // rescale factor for ensemble average
    1692       18220 :   double weight = 0.;
    1693       18220 :   double neff = 0.;
    1694       18220 :   double norm = 0.;
    1695       18220 :   get_weights(weight, norm, neff);
    1696             : 
    1697             :   // in case of ensemble averaging, calculate average overlap
    1698       18220 :   if(!no_aver_ && nrep_>1) {
    1699             :     // if master node, calculate average across replicas
    1700          12 :     if(rank_==0) {
    1701       10812 :       for(unsigned i=0; i<ovmd_.size(); ++i) {
    1702       10800 :         ovmd_ave_[i] = weight / norm * ovmd_[i];
    1703             :       }
    1704          12 :       multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1705             :     } else {
    1706           0 :       for(unsigned i=0; i<ovmd_ave_.size(); ++i) {
    1707           0 :         ovmd_ave_[i] = 0.0;
    1708             :       }
    1709             :     }
    1710             :     // local communication
    1711          12 :     if(size_>1) {
    1712           0 :       comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1713             :     }
    1714             :   } else {
    1715      182080 :     for(unsigned i=0; i<ovmd_.size(); ++i) {
    1716      163872 :       ovmd_ave_[i] = ovmd_[i];
    1717             :     }
    1718             :   }
    1719             : 
    1720             :   // get time step
    1721       18220 :   long long int step = getStep();
    1722             : 
    1723             :   // do regression
    1724       18220 :   if(nregres_>0) {
    1725           0 :     if(step%nregres_==0 && !getExchangeStep()) {
    1726           0 :       scale_ = doRegression();
    1727             :     }
    1728             :     // set scale component
    1729           0 :     getPntrToComponent("scale")->set(scale_);
    1730             :   }
    1731             : 
    1732             :   // write model overlap to file
    1733       18220 :   if(ovstride_>0 && step%ovstride_==0) {
    1734           4 :     write_model_overlap(step);
    1735             :   }
    1736             : 
    1737             :   // clear energy and virial
    1738       18220 :   ene_ = 0.0;
    1739       18220 :   virial_.zero();
    1740             : 
    1741             :   // Gaussian noise
    1742       18220 :   if(noise_==0) {
    1743        7318 :     calculate_Gauss();
    1744             :   }
    1745             : 
    1746             :   // Outliers noise
    1747       18220 :   if(noise_==1) {
    1748        5451 :     calculate_Outliers();
    1749             :   }
    1750             : 
    1751             :   // Marginal noise
    1752       18220 :   if(noise_==2) {
    1753        5451 :     calculate_Marginal();
    1754             :   }
    1755             : 
    1756             :   // get annealing rescale factor
    1757       18220 :   if(nanneal_>0) {
    1758          20 :     anneal_ = get_annealing(step);
    1759          40 :     getPntrToComponent("anneal")->set(anneal_);
    1760             :   }
    1761             : 
    1762             :   // annealing rescale
    1763       18220 :   ene_ /= anneal_;
    1764             : 
    1765       18220 :   std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
    1766             :   // in case of ensemble averaging
    1767       18220 :   if(!no_aver_ && nrep_>1) {
    1768             :     // if master node, sum der_GMMid derivatives and ene
    1769          12 :     if(rank_==0) {
    1770       10812 :       for(unsigned i=0; i<GMMid_der_.size(); ++i) {
    1771       10800 :         GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
    1772             :       }
    1773          12 :       multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1774          12 :       multi_sim_comm.Sum(&ene_, 1);
    1775             :     } else {
    1776             :       // set der_GMMid derivatives and energy to zero
    1777           0 :       for(unsigned i=0; i<GMMid_der_av_.size(); ++i) {
    1778           0 :         GMMid_der_av_[i]=0.0;
    1779             :       }
    1780           0 :       ene_ = 0.0;
    1781             :     }
    1782             :     // local communication
    1783          12 :     if(size_>1) {
    1784           0 :       comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1785           0 :       comm.Sum(&ene_, 1);
    1786             :     }
    1787             :   } else {
    1788      182080 :     for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
    1789      163872 :       GMMid_der_av_[i] = GMMid_der_[i];
    1790             :     }
    1791             :   }
    1792             : 
    1793             :   // clean temporary std::vector
    1794    10979556 :   for(unsigned i=0; i<atom_der_.size(); ++i) {
    1795    10961336 :     atom_der_[i] = Vector(0,0,0);
    1796             :   }
    1797             : 
    1798             :   // get derivatives of bias with respect to atoms
    1799     2133207 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1800             :     // get indexes of data and model component
    1801     2114987 :     unsigned id = nl_[i] / GMM_m_type_.size();
    1802     2114987 :     unsigned im = nl_[i] % GMM_m_type_.size();
    1803             :     // chain rule + replica normalization
    1804     2114987 :     Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
    1805     2114987 :     Vector pos;
    1806     2114987 :     if(pbc_) {
    1807           0 :       pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
    1808             :     } else {
    1809     2114987 :       pos = getPosition(im);
    1810             :     }
    1811             :     // increment derivatives and virial
    1812     2114987 :     atom_der_[im] += tot_der;
    1813     2114987 :     virial_ += Tensor(pos, -tot_der);
    1814             :   }
    1815             : 
    1816             :   // communicate local derivatives and virial
    1817       18220 :   if(size_>1) {
    1818       14574 :     comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
    1819       14574 :     comm.Sum(virial_);
    1820             :   }
    1821             : 
    1822             :   // set derivatives, virial, and score
    1823    10979556 :   for(unsigned i=0; i<atom_der_.size(); ++i) {
    1824    21922672 :     setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
    1825             :   }
    1826       18220 :   setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
    1827       18220 :   getPntrToComponent("scoreb")->set(ene_);
    1828             : 
    1829       18220 :   if (do_reweight_) {
    1830             :     double w_tmp = 0.;
    1831       10812 :     for (unsigned i = 0; i < ovmd_.size(); ++i) {
    1832       10800 :       w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
    1833             :     }
    1834          12 :     w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
    1835             : 
    1836          12 :     setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
    1837          24 :     getPntrToComponent("biasDer")->set(w_tmp);
    1838             :   }
    1839             : 
    1840             :   // This part is needed only for Gaussian and Outliers noise models
    1841       18220 :   if(noise_!=2) {
    1842             : 
    1843             :     // do Montecarlo
    1844       12769 :     if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) {
    1845           0 :       doMonteCarlo();
    1846             :     }
    1847             : 
    1848             :     // print status
    1849       12769 :     if(step%statusstride_==0) {
    1850       12729 :       print_status(step);
    1851             :     }
    1852             : 
    1853             :     // calculate acceptance ratio
    1854       12769 :     double acc = MCaccept_ / MCtrials_;
    1855             : 
    1856             :     // set value
    1857       25538 :     getPntrToComponent("acc")->set(acc);
    1858             : 
    1859             :   }
    1860             : 
    1861       18220 : }
    1862             : 
    1863        7318 : void EMMI::calculate_Gauss() {
    1864             :   // cycle on all the GMM groups
    1865       14636 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1866             :     double eneg = 0.0;
    1867             :     // cycle on all the members of the group
    1868       83872 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1869             :       // id of the GMM component
    1870       76554 :       int GMMid = GMM_d_grps_[i][j];
    1871             :       // calculate deviation
    1872       76554 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1873             :       // add to group energy
    1874       76554 :       eneg += 0.5 * dev * dev;
    1875             :       // store derivative for later
    1876       76554 :       GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
    1877             :     }
    1878             :     // add to total energy along with normalizations and prior
    1879        7318 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1880             :   }
    1881        7318 : }
    1882             : 
    1883        5451 : void EMMI::calculate_Outliers() {
    1884             :   // cycle on all the GMM groups
    1885       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1886             :     // cycle on all the members of the group
    1887             :     double eneg = 0.0;
    1888       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1889             :       // id of the GMM component
    1890       49059 :       int GMMid = GMM_d_grps_[i][j];
    1891             :       // calculate deviation
    1892       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1893             :       // add to group energy
    1894       49059 :       eneg += std::log1p( 0.5 * dev * dev );
    1895             :       // store derivative for later
    1896       49059 :       GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
    1897             :     }
    1898             :     // add to total energy along with normalizations and prior
    1899        5451 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1900             :   }
    1901        5451 : }
    1902             : 
    1903        5451 : void EMMI::calculate_Marginal() {
    1904             :   // cycle on all the GMM groups
    1905       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1906             :     // cycle on all the members of the group
    1907       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1908             :       // id of the GMM component
    1909       49059 :       int GMMid = GMM_d_grps_[i][j];
    1910             :       // calculate deviation
    1911       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
    1912             :       // calculate errf
    1913       49059 :       double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
    1914             :       // add to group energy
    1915       49059 :       ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
    1916             :       // store derivative for later
    1917       49059 :       GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
    1918             :     }
    1919             :   }
    1920        5451 : }
    1921             : 
    1922             : }
    1923             : }

Generated by: LCOV version 1.16