Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "core/ActionRegister.h"
26 : #include "core/PlumedMain.h"
27 : #include "tools/Matrix.h"
28 : #include "core/GenericMolInfo.h"
29 : #include "core/ActionSet.h"
30 : #include "tools/File.h"
31 : #include "tools/Random.h"
32 :
33 : #include <string>
34 : #include <map>
35 : #include <numeric>
36 : #include <ctime>
37 :
38 : namespace PLMD {
39 : namespace isdb {
40 :
41 : //+PLUMEDOC ISDB_COLVAR EMMI
42 : /*
43 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
44 :
45 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
46 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
47 :
48 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
49 : GMM_FILE. We are currently working on a web server to perform
50 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
51 :
52 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
53 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
54 : the Metainference approach \cite Bonomi:2016ip .
55 :
56 : \warning
57 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
58 :
59 : \note
60 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
61 : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
62 : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
63 :
64 : \note
65 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
66 : add the NOPBC flag to the input line
67 :
68 : \par Examples
69 :
70 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
71 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
72 :
73 : \plumedfile
74 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
75 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
76 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
77 : ...
78 : \endplumedfile
79 :
80 : To accelerate the computation of the Bayesian score, one can:
81 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
82 : - calculate the restraint every other step (or more).
83 :
84 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
85 : using a GROMACS index file.
86 :
87 : The input file looks as follows:
88 :
89 : \plumedfile
90 : # include pdb info
91 : MOLINFO STRUCTURE=prot.pdb
92 :
93 : # all heavy atoms
94 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
95 :
96 : # create EMMI score
97 : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
98 :
99 : # translate into bias - apply every 2 steps
100 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
101 :
102 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
103 : \endplumedfile
104 :
105 :
106 : */
107 : //+ENDPLUMEDOC
108 :
109 : class EMMI :
110 : public ActionAtomistic,
111 : public ActionWithArguments,
112 : public ActionWithValue {
113 : private:
114 :
115 : // temperature in kbt
116 : double kbt_;
117 : // model GMM - atom types
118 : std::vector<unsigned> GMM_m_type_;
119 : // model GMM - list of atom sigmas - one per atom type
120 : std::vector<double> GMM_m_s_;
121 : // model GMM - list of atom weights - one per atom type
122 : std::vector<double> GMM_m_w_;
123 : // data GMM - means, weights, and covariances + beta option
124 : std::vector<Vector> GMM_d_m_;
125 : std::vector<double> GMM_d_w_;
126 : std::vector< VectorGeneric<6> > GMM_d_cov_;
127 : std::vector<int> GMM_d_beta_;
128 : std::vector < std::vector<int> > GMM_d_grps_;
129 : // overlaps
130 : std::vector<double> ovmd_;
131 : std::vector<double> ovdd_;
132 : std::vector<double> ovmd_ave_;
133 : // and derivatives
134 : std::vector<Vector> ovmd_der_;
135 : std::vector<Vector> atom_der_;
136 : std::vector<double> GMMid_der_;
137 : // constants
138 : double cfact_;
139 : double inv_sqrt2_, sqrt2_pi_;
140 : // metainference
141 : unsigned nrep_;
142 : unsigned replica_;
143 : std::vector<double> sigma_;
144 : std::vector<double> sigma_min_;
145 : std::vector<double> sigma_max_;
146 : std::vector<double> dsigma_;
147 : // list of prefactors for overlap between two Gaussians
148 : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
149 : std::vector<double> pre_fact_;
150 : // inverse of the sum of model and data covariances matrices
151 : std::vector< VectorGeneric<6> > inv_cov_md_;
152 : // neighbor list
153 : double nl_cutoff_;
154 : unsigned nl_stride_;
155 : bool first_time_;
156 : bool no_aver_;
157 : std::vector<unsigned> nl_;
158 : // parallel stuff
159 : unsigned size_;
160 : unsigned rank_;
161 : // pbc
162 : bool pbc_;
163 : // Monte Carlo stuff
164 : int MCstride_;
165 : double MCaccept_;
166 : double MCtrials_;
167 : Random random_;
168 : // status stuff
169 : unsigned int statusstride_;
170 : std::string statusfilename_;
171 : OFile statusfile_;
172 : bool first_status_;
173 : // regression
174 : unsigned nregres_;
175 : double scale_;
176 : double scale_min_;
177 : double scale_max_;
178 : double dscale_;
179 : // tabulated exponential
180 : double dpcutoff_;
181 : double dexp_;
182 : unsigned nexp_;
183 : std::vector<double> tab_exp_;
184 : // simulated annealing
185 : unsigned nanneal_;
186 : double kanneal_;
187 : double anneal_;
188 : // prior exponent
189 : double prior_;
190 : // noise type
191 : unsigned noise_;
192 : // total score and virial;
193 : double ene_;
194 : Tensor virial_;
195 : // model overlap file
196 : unsigned int ovstride_;
197 : std::string ovfilename_;
198 :
199 : // Reweighting additions
200 : bool do_reweight_;
201 : bool first_time_w_;
202 : std::vector<double> forces;
203 : std::vector<double> forcesToApply;
204 :
205 : // average weights
206 : double decay_w_;
207 : std::vector<double> average_weights_;
208 :
209 : // write file with model overlap
210 : void write_model_overlap(long long int step);
211 : // get median of std::vector
212 : double get_median(std::vector<double> &v);
213 : // annealing
214 : double get_annealing(long long int step);
215 : // do regression
216 : double scaleEnergy(double s);
217 : double doRegression();
218 : // read and write status
219 : void read_status();
220 : void print_status(long long int step);
221 : // accept or reject
222 : bool doAccept(double oldE, double newE, double kbt);
223 : // do MonteCarlo
224 : void doMonteCarlo();
225 : // read error file
226 : std::vector<double> read_exp_errors(const std::string & errfile);
227 : // read experimental overlaps
228 : std::vector<double> read_exp_overlaps(const std::string & ovfile);
229 : // calculate model GMM weights and covariances
230 : std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
231 : // read data GMM file
232 : void get_GMM_d(std::string gmm_file);
233 : // check GMM data
234 : void check_GMM_d(const VectorGeneric<6> &cov, double w);
235 : // auxiliary method
236 : void calculate_useful_stuff(double reso);
237 : // get pref_fact and inv_cov_md
238 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
239 : double GMM_w_0, double GMM_w_1,
240 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
241 : // calculate self overlaps between data GMM components - ovdd_
242 : double get_self_overlap(unsigned id);
243 : // calculate overlap between two Gaussians
244 : double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
245 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
246 : // calculate exponent of overlap for neighbor list update
247 : double get_exp_overlap(const Vector &m_m, const Vector &d_m,
248 : const VectorGeneric<6> &inv_cov_md);
249 : // update the neighbor list
250 : void update_neighbor_list();
251 : // calculate overlap
252 : void calculate_overlap();
253 : // Gaussian noise
254 : void calculate_Gauss();
255 : // Outliers noise
256 : void calculate_Outliers();
257 : // Marginal noise
258 : void calculate_Marginal();
259 :
260 : // See MetainferenceBase
261 : void get_weights(double &weight, double &norm, double &neff);
262 :
263 : public:
264 : static void registerKeywords( Keywords& keys );
265 : explicit EMMI(const ActionOptions&);
266 : // needed for reweighting
267 : void setDerivatives();
268 : void turnOnDerivatives() override;
269 : unsigned getNumberOfDerivatives() override;
270 : void lockRequests() override;
271 : void unlockRequests() override;
272 : void calculateNumericalDerivatives( ActionWithValue* a ) override;
273 : void apply() override;
274 : void setArgDerivatives(Value *v, const double &d);
275 : void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
276 : void setBoxDerivatives(Value*v, const Tensor&d);
277 : // active methods:
278 : void prepare() override;
279 : void calculate() override;
280 : };
281 :
282 : inline
283 30 : void EMMI::setDerivatives() {
284 : // Get appropriate number of derivatives
285 : // Derivatives are first for arguments and then for atoms
286 : unsigned nder;
287 30 : if( getNumberOfAtoms()>0 ) {
288 30 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
289 : } else {
290 0 : nder = getNumberOfArguments();
291 : }
292 :
293 : // Resize all derivative arrays
294 30 : forces.resize( nder );
295 30 : forcesToApply.resize( nder );
296 146 : for(int i=0; i<getNumberOfComponents(); ++i) {
297 116 : getPntrToComponent(i)->resizeDerivatives(nder);
298 : }
299 30 : }
300 :
301 : inline
302 22 : void EMMI::turnOnDerivatives() {
303 22 : ActionWithValue::turnOnDerivatives();
304 22 : }
305 :
306 : inline
307 80 : unsigned EMMI::getNumberOfDerivatives() {
308 80 : if( getNumberOfAtoms()>0 ) {
309 80 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
310 : }
311 0 : return getNumberOfArguments();
312 : }
313 :
314 : inline
315 70 : void EMMI::lockRequests() {
316 : ActionAtomistic::lockRequests();
317 : ActionWithArguments::lockRequests();
318 70 : }
319 :
320 : inline
321 70 : void EMMI::unlockRequests() {
322 : ActionAtomistic::unlockRequests();
323 : ActionWithArguments::unlockRequests();
324 70 : }
325 :
326 : inline
327 10 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
328 10 : if( getNumberOfArguments()>0 ) {
329 0 : ActionWithArguments::calculateNumericalDerivatives( a );
330 : }
331 10 : if( getNumberOfAtoms()>0 ) {
332 10 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
333 44 : for(int j=0; j<getNumberOfComponents(); ++j) {
334 34 : for(unsigned i=0; i<getNumberOfArguments(); ++i)
335 0 : if(getPntrToComponent(j)->hasDerivatives()) {
336 0 : save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
337 : }
338 : }
339 10 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
340 44 : for(int j=0; j<getNumberOfComponents(); ++j) {
341 34 : for(unsigned i=0; i<getNumberOfArguments(); ++i)
342 0 : if(getPntrToComponent(j)->hasDerivatives()) {
343 0 : getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
344 : }
345 : }
346 : }
347 10 : }
348 :
349 : inline
350 70 : void EMMI::apply() {
351 : bool wasforced=false;
352 70 : forcesToApply.assign(forcesToApply.size(),0.0);
353 382 : for(int i=0; i<getNumberOfComponents(); ++i) {
354 312 : if( getPntrToComponent(i)->applyForce( forces ) ) {
355 : wasforced=true;
356 0 : for(unsigned i=0; i<forces.size(); ++i) {
357 0 : forcesToApply[i]+=forces[i];
358 : }
359 : }
360 : }
361 70 : if( wasforced ) {
362 0 : addForcesOnArguments( forcesToApply );
363 0 : if( getNumberOfAtoms()>0 ) {
364 0 : setForcesOnAtoms( forcesToApply, getNumberOfArguments() );
365 : }
366 : }
367 70 : }
368 :
369 : inline
370 : void EMMI::setArgDerivatives(Value *v, const double &d) {
371 : v->addDerivative(0,d);
372 : }
373 :
374 : inline
375 10961336 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
376 10961336 : const unsigned noa=getNumberOfArguments();
377 10961336 : v->addDerivative(noa+3*i+0,d[0]);
378 10961336 : v->addDerivative(noa+3*i+1,d[1]);
379 10961336 : v->addDerivative(noa+3*i+2,d[2]);
380 10961336 : }
381 :
382 : inline
383 18220 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
384 18220 : const unsigned noa=getNumberOfArguments();
385 : const unsigned nat=getNumberOfAtoms();
386 18220 : v->addDerivative(noa+3*nat+0,d(0,0));
387 18220 : v->addDerivative(noa+3*nat+1,d(0,1));
388 18220 : v->addDerivative(noa+3*nat+2,d(0,2));
389 18220 : v->addDerivative(noa+3*nat+3,d(1,0));
390 18220 : v->addDerivative(noa+3*nat+4,d(1,1));
391 18220 : v->addDerivative(noa+3*nat+5,d(1,2));
392 18220 : v->addDerivative(noa+3*nat+6,d(2,0));
393 18220 : v->addDerivative(noa+3*nat+7,d(2,1));
394 18220 : v->addDerivative(noa+3*nat+8,d(2,2));
395 18220 : }
396 :
397 13845 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
398 :
399 34 : void EMMI::registerKeywords( Keywords& keys ) {
400 34 : Action::registerKeywords( keys );
401 34 : ActionAtomistic::registerKeywords( keys );
402 34 : ActionWithValue::registerKeywords( keys );
403 34 : ActionWithArguments::registerKeywords( keys );
404 34 : keys.use("ARG");
405 68 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
406 68 : keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
407 68 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
408 68 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
409 68 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
410 68 : keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
411 68 : keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
412 68 : keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
413 68 : keys.add("optional","SIGMA0","initial value of the uncertainty");
414 68 : keys.add("optional","DSIGMA","MC step for uncertainties");
415 68 : keys.add("optional","MC_STRIDE", "Monte Carlo stride");
416 68 : keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
417 68 : keys.add("optional","OV_FILE","file with experimental overlaps");
418 68 : keys.add("optional","NORM_DENSITY","integral of the experimental density");
419 68 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
420 68 : keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
421 68 : keys.add("optional","REGRESSION","regression stride");
422 68 : keys.add("optional","REG_SCALE_MIN","regression minimum scale");
423 68 : keys.add("optional","REG_SCALE_MAX","regression maximum scale");
424 68 : keys.add("optional","REG_DSCALE","regression maximum scale MC move");
425 68 : keys.add("optional","SCALE","scale factor");
426 68 : keys.add("optional","ANNEAL", "Length of annealing cycle");
427 68 : keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
428 68 : keys.add("optional","TEMP","temperature");
429 68 : keys.add("optional","PRIOR", "exponent of uncertainty prior");
430 68 : keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
431 68 : keys.add("optional","WRITE_OV","write a file with model overlaps");
432 68 : keys.add("optional","AVERAGING", "Averaging window for weights");
433 68 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
434 68 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
435 34 : componentsAreNotOptional(keys);
436 68 : keys.addOutputComponent("scoreb","default","Bayesian score");
437 68 : keys.addOutputComponent("acc", "NOISETYPE","MC acceptance for uncertainty");
438 68 : keys.addOutputComponent("scale", "REGRESSION","scale factor");
439 68 : keys.addOutputComponent("accscale", "REGRESSION","MC acceptance for scale regression");
440 68 : keys.addOutputComponent("enescale", "REGRESSION","MC energy for scale regression");
441 68 : keys.addOutputComponent("anneal","ANNEAL","annealing factor");
442 68 : keys.addOutputComponent("weight", "REWEIGHT", "weights of the weighted average");
443 68 : keys.addOutputComponent("biasDer", "REWEIGHT", "derivatives with respect to the bias");
444 68 : keys.addOutputComponent("sigma", "NOISETYPE", "uncertainty in the forward models and experiment");
445 68 : keys.addOutputComponent("neff", "default", "effective number of replicas");
446 34 : }
447 :
448 30 : EMMI::EMMI(const ActionOptions&ao):
449 : Action(ao),
450 : ActionAtomistic(ao),
451 : ActionWithArguments(ao),
452 : ActionWithValue(ao),
453 30 : inv_sqrt2_(0.707106781186548),
454 30 : sqrt2_pi_(0.797884560802865),
455 30 : first_time_(true), no_aver_(false), pbc_(true),
456 30 : MCstride_(1), MCaccept_(0.), MCtrials_(0.),
457 60 : statusstride_(0), first_status_(true),
458 30 : nregres_(0), scale_(1.),
459 30 : dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
460 60 : kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
461 30 : do_reweight_(false), first_time_w_(true), decay_w_(1.) {
462 : // periodic boundary conditions
463 30 : bool nopbc=!pbc_;
464 30 : parseFlag("NOPBC",nopbc);
465 30 : pbc_=!nopbc;
466 :
467 : // list of atoms
468 : std::vector<AtomNumber> atoms;
469 60 : parseAtomList("ATOMS", atoms);
470 :
471 : // file with data GMM
472 : std::string GMM_file;
473 60 : parse("GMM_FILE", GMM_file);
474 :
475 : // type of data noise
476 : std::string noise;
477 60 : parse("NOISETYPE",noise);
478 30 : if (noise=="GAUSS") {
479 18 : noise_ = 0;
480 12 : } else if(noise=="OUTLIERS") {
481 6 : noise_ = 1;
482 6 : } else if(noise=="MARGINAL") {
483 6 : noise_ = 2;
484 : } else {
485 0 : error("Unknown noise type!");
486 : }
487 :
488 : // minimum value for error
489 : double sigma_min;
490 30 : parse("SIGMA_MIN", sigma_min);
491 30 : if(sigma_min<0) {
492 0 : error("SIGMA_MIN should be greater or equal to zero");
493 : }
494 :
495 : // the following parameters must be specified with noise type 0 and 1
496 : double sigma_ini, dsigma;
497 30 : if(noise_!=2) {
498 : // initial value of the uncertainty
499 24 : parse("SIGMA0", sigma_ini);
500 24 : if(sigma_ini<=0) {
501 0 : error("you must specify a positive SIGMA0");
502 : }
503 : // MC parameters
504 24 : parse("DSIGMA", dsigma);
505 24 : if(dsigma<0) {
506 0 : error("you must specify a positive DSIGMA");
507 : }
508 24 : parse("MC_STRIDE", MCstride_);
509 24 : if(dsigma>0 && MCstride_<=0) {
510 0 : error("you must specify a positive MC_STRIDE");
511 : }
512 : // status file parameters
513 24 : parse("WRITE_STRIDE", statusstride_);
514 24 : if(statusstride_==0) {
515 0 : error("you must specify a positive WRITE_STRIDE");
516 : }
517 48 : parse("STATUS_FILE", statusfilename_);
518 24 : if(statusfilename_=="") {
519 48 : statusfilename_ = "MISTATUS"+getLabel();
520 : } else {
521 0 : statusfilename_ = statusfilename_+getLabel();
522 : }
523 : }
524 :
525 : // error file
526 : std::string errfile;
527 60 : parse("ERR_FILE", errfile);
528 :
529 : // file with experimental overlaps
530 : std::string ovfile;
531 30 : parse("OV_FILE", ovfile);
532 :
533 : // integral of the experimetal density
534 30 : double norm_d = 0.0;
535 30 : parse("NORM_DENSITY", norm_d);
536 :
537 : // temperature
538 30 : double temp=0.0;
539 30 : parse("TEMP",temp);
540 : // convert temp to kbt
541 30 : if(temp>0.0) {
542 28 : kbt_=plumed.getAtoms().getKBoltzmann()*temp;
543 : } else {
544 2 : kbt_=plumed.getAtoms().getKbT();
545 : }
546 :
547 : // exponent of uncertainty prior
548 30 : parse("PRIOR",prior_);
549 :
550 : // simulated annealing stuff
551 30 : parse("ANNEAL", nanneal_);
552 30 : parse("ANNEAL_FACT", kanneal_);
553 30 : if(nanneal_>0 && kanneal_<=1.0) {
554 0 : error("with ANNEAL, ANNEAL_FACT must be greater than 1");
555 : }
556 :
557 : // regression stride
558 30 : parse("REGRESSION",nregres_);
559 : // other regression parameters
560 30 : if(nregres_>0) {
561 0 : parse("REG_SCALE_MIN",scale_min_);
562 0 : parse("REG_SCALE_MAX",scale_max_);
563 0 : parse("REG_DSCALE",dscale_);
564 : // checks
565 0 : if(scale_max_<=scale_min_) {
566 0 : error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
567 : }
568 0 : if(dscale_<=0.) {
569 0 : error("with REGRESSION, REG_DSCALE must be positive");
570 : }
571 : }
572 :
573 : // scale factor
574 30 : parse("SCALE", scale_);
575 :
576 : // read map resolution
577 : double reso;
578 30 : parse("RESOLUTION", reso);
579 30 : if(reso<=0.) {
580 0 : error("RESOLUTION should be strictly positive");
581 : }
582 :
583 : // neighbor list stuff
584 30 : parse("NL_CUTOFF",nl_cutoff_);
585 30 : if(nl_cutoff_<=0.0) {
586 0 : error("NL_CUTOFF should be explicitly specified and positive");
587 : }
588 30 : parse("NL_STRIDE",nl_stride_);
589 30 : if(nl_stride_==0) {
590 0 : error("NL_STRIDE should be explicitly specified and positive");
591 : }
592 :
593 : // averaging or not
594 30 : parseFlag("NO_AVER",no_aver_);
595 :
596 : // write overlap file
597 30 : parse("WRITE_OV_STRIDE", ovstride_);
598 30 : parse("WRITE_OV", ovfilename_);
599 32 : if(ovstride_>0 && ovfilename_=="") {
600 0 : error("With WRITE_OV_STRIDE you must specify WRITE_OV");
601 : }
602 :
603 : // set parallel stuff
604 30 : size_=comm.Get_size();
605 30 : rank_=comm.Get_rank();
606 :
607 : // get number of replicas
608 30 : if(rank_==0) {
609 18 : if(no_aver_) {
610 16 : nrep_ = 1;
611 : } else {
612 2 : nrep_ = multi_sim_comm.Get_size();
613 : }
614 18 : replica_ = multi_sim_comm.Get_rank();
615 : } else {
616 12 : nrep_ = 0;
617 12 : replica_ = 0;
618 : }
619 30 : comm.Sum(&nrep_,1);
620 30 : comm.Sum(&replica_,1);
621 :
622 : // Reweighting flag
623 30 : parseFlag("REWEIGHT", do_reweight_);
624 30 : if(do_reweight_&&getNumberOfArguments()!=1) {
625 0 : error("To REWEIGHT one must provide one single bias as an argument");
626 : }
627 30 : if(do_reweight_&&no_aver_) {
628 0 : error("REWEIGHT cannot be used with NO_AVER");
629 : }
630 30 : if(do_reweight_&&nrep_<2) {
631 0 : error("REWEIGHT can only be used in parallel with 2 or more replicas");
632 : }
633 30 : if(!getRestart()) {
634 30 : average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
635 : } else {
636 0 : average_weights_.resize(nrep_, 0.);
637 : }
638 :
639 30 : unsigned averaging=0;
640 30 : parse("AVERAGING", averaging);
641 30 : if(averaging>0) {
642 2 : decay_w_ = 1./static_cast<double> (averaging);
643 : }
644 :
645 30 : checkRead();
646 :
647 30 : log.printf(" atoms involved : ");
648 16906 : for(unsigned i=0; i<atoms.size(); ++i) {
649 16876 : log.printf("%d ",atoms[i].serial());
650 : }
651 30 : log.printf("\n");
652 30 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
653 30 : if(no_aver_) {
654 28 : log.printf(" without ensemble averaging\n");
655 : }
656 30 : log.printf(" type of data noise : %s\n", noise.c_str());
657 30 : log.printf(" neighbor list cutoff : %lf\n", nl_cutoff_);
658 30 : log.printf(" neighbor list stride : %u\n", nl_stride_);
659 30 : log.printf(" minimum uncertainty : %f\n",sigma_min);
660 30 : log.printf(" scale factor : %lf\n",scale_);
661 30 : if(nregres_>0) {
662 0 : log.printf(" regression stride : %u\n", nregres_);
663 0 : log.printf(" regression minimum scale : %lf\n", scale_min_);
664 0 : log.printf(" regression maximum scale : %lf\n", scale_max_);
665 0 : log.printf(" regression maximum scale MC move : %lf\n", dscale_);
666 : }
667 30 : if(noise_!=2) {
668 24 : log.printf(" initial value of the uncertainty : %f\n",sigma_ini);
669 24 : log.printf(" max MC move in uncertainty : %f\n",dsigma);
670 24 : log.printf(" MC stride : %u\n", MCstride_);
671 24 : log.printf(" reading/writing to status file : %s\n",statusfilename_.c_str());
672 24 : log.printf(" with stride : %u\n",statusstride_);
673 : }
674 30 : if(errfile.size()>0) {
675 0 : log.printf(" reading experimental errors from file : %s\n", errfile.c_str());
676 : }
677 30 : if(ovfile.size()>0) {
678 0 : log.printf(" reading experimental overlaps from file : %s\n", ovfile.c_str());
679 : }
680 30 : log.printf(" temperature of the system in energy unit : %f\n",kbt_);
681 30 : log.printf(" prior exponent : %f\n",prior_);
682 30 : log.printf(" number of replicas for averaging: %u\n",nrep_);
683 30 : log.printf(" id of the replica : %u\n",replica_);
684 30 : if(nanneal_>0) {
685 4 : log.printf(" length of annealing cycle : %u\n",nanneal_);
686 4 : log.printf(" annealing factor : %f\n",kanneal_);
687 : }
688 30 : if(ovstride_>0) {
689 2 : log.printf(" stride for writing model overlaps : %u\n",ovstride_);
690 2 : log.printf(" file for writing model overlaps : %s\n", ovfilename_.c_str());
691 : }
692 :
693 : // set constant quantity before calculating stuff
694 30 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
695 :
696 : // calculate model GMM constant parameters
697 30 : std::vector<double> GMM_m_w = get_GMM_m(atoms);
698 :
699 : // read data GMM parameters
700 30 : get_GMM_d(GMM_file);
701 30 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
702 :
703 : // normalize atom weight map
704 30 : if(norm_d <= 0.0) {
705 30 : norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
706 : }
707 : double norm_m = accumulate(GMM_m_w.begin(), GMM_m_w.end(), 0.0);
708 : // renormalization
709 150 : for(unsigned i=0; i<GMM_m_w_.size(); ++i) {
710 120 : GMM_m_w_[i] *= norm_d / norm_m;
711 : }
712 :
713 : // read experimental errors
714 : std::vector<double> exp_err;
715 30 : if(errfile.size()>0) {
716 0 : exp_err = read_exp_errors(errfile);
717 : }
718 :
719 : // get self overlaps between data GMM components
720 30 : if(ovfile.size()>0) {
721 0 : ovdd_ = read_exp_overlaps(ovfile);
722 : } else {
723 2082 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
724 2052 : double ov = get_self_overlap(i);
725 2052 : ovdd_.push_back(ov);
726 : }
727 : }
728 :
729 30 : log.printf(" number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
730 : // cycle on GMM groups
731 60 : for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
732 30 : log.printf(" group %d\n", Gid);
733 : // calculate median overlap and experimental error
734 : std::vector<double> ovdd;
735 : std::vector<double> err;
736 : // cycle on the group members
737 2082 : for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
738 : // GMM id
739 2052 : int GMMid = GMM_d_grps_[Gid][i];
740 : // add to experimental error
741 2052 : if(errfile.size()>0) {
742 0 : err.push_back(exp_err[GMMid]);
743 : } else {
744 2052 : err.push_back(0.);
745 : }
746 : // add to GMM overlap
747 2052 : ovdd.push_back(ovdd_[GMMid]);
748 : }
749 : // calculate median quantities
750 30 : double ovdd_m = get_median(ovdd);
751 30 : double err_m = get_median(err);
752 : // print out statistics
753 30 : log.printf(" # of members : %zu\n", GMM_d_grps_[Gid].size());
754 30 : log.printf(" median overlap : %lf\n", ovdd_m);
755 30 : log.printf(" median error : %lf\n", err_m);
756 : // add minimum value of sigma for this group of GMMs
757 30 : sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
758 : // these are only needed with Gaussian and Outliers noise models
759 30 : if(noise_!=2) {
760 : // set dsigma
761 24 : dsigma_.push_back(dsigma * ovdd_m);
762 : // set sigma max
763 24 : sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
764 : // initialize sigma
765 48 : sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
766 : }
767 : }
768 :
769 : // read status file if restarting
770 30 : if(getRestart() && noise_!=2) {
771 0 : read_status();
772 : }
773 :
774 : // calculate auxiliary stuff
775 30 : calculate_useful_stuff(reso);
776 :
777 : // prepare data and derivative std::vectors
778 30 : ovmd_.resize(ovdd_.size());
779 30 : ovmd_ave_.resize(ovdd_.size());
780 30 : atom_der_.resize(GMM_m_type_.size());
781 30 : GMMid_der_.resize(ovdd_.size());
782 :
783 : // clear things that are no longer needed
784 : GMM_d_cov_.clear();
785 :
786 : // add components
787 30 : addComponentWithDerivatives("scoreb");
788 30 : componentIsNotPeriodic("scoreb");
789 :
790 30 : if(noise_!=2) {
791 24 : addComponent("acc");
792 48 : componentIsNotPeriodic("acc");
793 : }
794 :
795 30 : if(nregres_>0) {
796 0 : addComponent("scale");
797 0 : componentIsNotPeriodic("scale");
798 0 : addComponent("accscale");
799 0 : componentIsNotPeriodic("accscale");
800 0 : addComponent("enescale");
801 0 : componentIsNotPeriodic("enescale");
802 : }
803 :
804 30 : if(nanneal_>0) {
805 4 : addComponent("anneal");
806 8 : componentIsNotPeriodic("anneal");
807 : }
808 :
809 30 : if(do_reweight_) {
810 2 : addComponent("biasDer");
811 2 : componentIsNotPeriodic("biasDer");
812 2 : addComponent("weight");
813 4 : componentIsNotPeriodic("weight");
814 : }
815 :
816 30 : addComponent("neff");
817 30 : componentIsNotPeriodic("neff");
818 :
819 54 : for(unsigned i=0; i<sigma_.size(); ++i) {
820 : std::string num;
821 24 : Tools::convert(i, num);
822 24 : addComponent("sigma-"+num);
823 24 : componentIsNotPeriodic("sigma-"+num);
824 48 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
825 : }
826 :
827 : // initialize random seed
828 : unsigned iseed;
829 30 : if(rank_==0) {
830 18 : iseed = time(NULL)+replica_;
831 : } else {
832 12 : iseed = 0;
833 : }
834 30 : comm.Sum(&iseed, 1);
835 30 : random_.setSeed(-iseed);
836 :
837 : // request the atoms
838 30 : requestAtoms(atoms);
839 30 : setDerivatives();
840 :
841 : // print bibliography
842 60 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
843 60 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
844 60 : log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
845 30 : if(do_reweight_) {
846 4 : log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
847 : }
848 30 : if(!no_aver_ && nrep_>1) {
849 4 : log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
850 : }
851 30 : log<<"\n";
852 30 : }
853 :
854 4 : void EMMI::write_model_overlap(long long int step) {
855 4 : OFile ovfile;
856 4 : ovfile.link(*this);
857 : std::string num;
858 4 : Tools::convert(step,num);
859 4 : std::string name = ovfilename_+"-"+num;
860 4 : ovfile.open(name);
861 : ovfile.setHeavyFlush();
862 4 : ovfile.fmtField("%10.7e ");
863 : // write overlaps
864 40 : for(int i=0; i<ovmd_.size(); ++i) {
865 36 : ovfile.printField("Model", ovmd_[i]);
866 72 : ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
867 36 : ovfile.printField("Data", ovdd_[i]);
868 36 : ovfile.printField();
869 : }
870 4 : ovfile.close();
871 4 : }
872 :
873 60 : double EMMI::get_median(std::vector<double> &v) {
874 : // dimension of std::vector
875 60 : unsigned size = v.size();
876 : // in case of only one entry
877 60 : if (size==1) {
878 0 : return v[0];
879 : } else {
880 : // reorder std::vector
881 60 : sort(v.begin(), v.end());
882 : // odd or even?
883 60 : if (size%2==0) {
884 4 : return (v[size/2-1]+v[size/2])/2.0;
885 : } else {
886 56 : return v[size/2];
887 : }
888 : }
889 : }
890 :
891 0 : void EMMI::read_status() {
892 : double MDtime;
893 : // open file
894 0 : auto ifile = Tools::make_unique<IFile>();
895 0 : ifile->link(*this);
896 0 : if(ifile->FileExist(statusfilename_)) {
897 0 : ifile->open(statusfilename_);
898 0 : while(ifile->scanField("MD_time", MDtime)) {
899 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
900 : // convert i to std::string
901 : std::string num;
902 0 : Tools::convert(i,num);
903 : // read entries
904 0 : ifile->scanField("s"+num, sigma_[i]);
905 : }
906 : // new line
907 0 : ifile->scanField();
908 : }
909 0 : ifile->close();
910 : } else {
911 0 : error("Cannot find status file "+statusfilename_+"\n");
912 : }
913 0 : }
914 :
915 12729 : void EMMI::print_status(long long int step) {
916 : // if first time open the file
917 12729 : if(first_status_) {
918 24 : first_status_ = false;
919 24 : statusfile_.link(*this);
920 24 : statusfile_.open(statusfilename_);
921 : statusfile_.setHeavyFlush();
922 48 : statusfile_.fmtField("%6.3e ");
923 : }
924 : // write fields
925 12729 : double MDtime = static_cast<double>(step)*getTimeStep();
926 12729 : statusfile_.printField("MD_time", MDtime);
927 25458 : for(unsigned i=0; i<sigma_.size(); ++i) {
928 : // convert i to std::string
929 : std::string num;
930 12729 : Tools::convert(i,num);
931 : // print entry
932 25458 : statusfile_.printField("s"+num, sigma_[i]);
933 : }
934 12729 : statusfile_.printField();
935 12729 : }
936 :
937 0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
938 : bool accept = false;
939 : // calculate delta energy
940 0 : double delta = ( newE - oldE ) / kbt;
941 : // if delta is negative always accept move
942 0 : if( delta < 0.0 ) {
943 : accept = true;
944 : } else {
945 : // otherwise extract random number
946 0 : double s = random_.RandU01();
947 0 : if( s < std::exp(-delta) ) {
948 : accept = true;
949 : }
950 : }
951 0 : return accept;
952 : }
953 :
954 0 : void EMMI::doMonteCarlo() {
955 : // extract random GMM group
956 0 : unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
957 0 : if(nGMM==GMM_d_grps_.size()) {
958 0 : nGMM -= 1;
959 : }
960 :
961 : // generate random move
962 0 : double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
963 : // new sigma
964 0 : double new_s = sigma_[nGMM] + shift;
965 : // check boundaries
966 0 : if(new_s > sigma_max_[nGMM]) {
967 0 : new_s = 2.0 * sigma_max_[nGMM] - new_s;
968 : }
969 0 : if(new_s < sigma_min_[nGMM]) {
970 0 : new_s = 2.0 * sigma_min_[nGMM] - new_s;
971 : }
972 : // old s2
973 0 : double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
974 : // new s2
975 0 : double new_inv_s2 = 1.0 / new_s / new_s;
976 :
977 : // cycle on GMM group and calculate old and new energy
978 : double old_ene = 0.0;
979 : double new_ene = 0.0;
980 0 : double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
981 :
982 : // in case of Gaussian noise
983 0 : if(noise_==0) {
984 : double chi2 = 0.0;
985 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
986 : // id GMM component
987 0 : int GMMid = GMM_d_grps_[nGMM][i];
988 : // deviation
989 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
990 : // add to chi2
991 0 : chi2 += dev * dev;
992 : }
993 : // final energy calculation: add normalization and prior
994 0 : old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
995 0 : new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
996 : }
997 :
998 : // in case of Outliers noise
999 0 : if(noise_==1) {
1000 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
1001 : // id GMM component
1002 0 : int GMMid = GMM_d_grps_[nGMM][i];
1003 : // calculate deviation
1004 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
1005 : // add to energies
1006 0 : old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
1007 0 : new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
1008 : }
1009 : // final energy calculation: add normalization and prior
1010 0 : old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
1011 0 : new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
1012 : }
1013 :
1014 : // increment number of trials
1015 0 : MCtrials_ += 1.0;
1016 :
1017 : // accept or reject
1018 0 : bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
1019 0 : if(accept) {
1020 0 : sigma_[nGMM] = new_s;
1021 0 : MCaccept_ += 1.0;
1022 : }
1023 : // local communication
1024 0 : if(rank_!=0) {
1025 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
1026 0 : sigma_[i] = 0.0;
1027 : }
1028 0 : MCaccept_ = 0.0;
1029 : }
1030 0 : if(size_>1) {
1031 0 : comm.Sum(&sigma_[0], sigma_.size());
1032 0 : comm.Sum(&MCaccept_, 1);
1033 : }
1034 :
1035 : // update sigma output
1036 : std::string num;
1037 0 : Tools::convert(nGMM, num);
1038 0 : getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
1039 0 : }
1040 :
1041 0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile) {
1042 : int nexp, idcomp;
1043 : double err;
1044 : std::vector<double> exp_err;
1045 : // open file
1046 0 : IFile *ifile = new IFile();
1047 0 : if(ifile->FileExist(errfile)) {
1048 0 : ifile->open(errfile);
1049 : // scan for number of experimental errors
1050 0 : ifile->scanField("Nexp", nexp);
1051 : // cycle on GMM components
1052 0 : while(ifile->scanField("Id",idcomp)) {
1053 : // total experimental error
1054 0 : double err_tot = 0.0;
1055 : // cycle on number of experimental overlaps
1056 0 : for(unsigned i=0; i<nexp; ++i) {
1057 : std::string ss;
1058 0 : Tools::convert(i,ss);
1059 0 : ifile->scanField("Err"+ss, err);
1060 : // add to total error
1061 0 : err_tot += err*err;
1062 : }
1063 : // new line
1064 0 : ifile->scanField();
1065 : // calculate RMSE
1066 0 : err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
1067 : // add to global
1068 0 : exp_err.push_back(err_tot);
1069 : }
1070 0 : ifile->close();
1071 : } else {
1072 0 : error("Cannot find ERR_FILE "+errfile+"\n");
1073 : }
1074 0 : return exp_err;
1075 : }
1076 :
1077 0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile) {
1078 : int idcomp;
1079 : double ov;
1080 : std::vector<double> ovdd;
1081 : // open file
1082 0 : IFile *ifile = new IFile();
1083 0 : if(ifile->FileExist(ovfile)) {
1084 0 : ifile->open(ovfile);
1085 : // cycle on GMM components
1086 0 : while(ifile->scanField("Id",idcomp)) {
1087 : // read experimental overlap
1088 0 : ifile->scanField("Overlap", ov);
1089 : // add to ovdd
1090 0 : ovdd.push_back(ov);
1091 : // new line
1092 0 : ifile->scanField();
1093 : }
1094 0 : ifile->close();
1095 : } else {
1096 0 : error("Cannot find OV_FILE "+ovfile+"\n");
1097 : }
1098 0 : return ovdd;
1099 : }
1100 :
1101 30 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms) {
1102 : // list of weights - one per atom
1103 : std::vector<double> GMM_m_w;
1104 :
1105 30 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
1106 : // map of atom types to A and B coefficients of scattering factor
1107 : // f(s) = A * exp(-B*s**2)
1108 : // B is in Angstrom squared
1109 : // map between an atom type and an index
1110 : std::map<std::string, unsigned> type_map;
1111 30 : type_map["C"]=0;
1112 30 : type_map["O"]=1;
1113 30 : type_map["N"]=2;
1114 30 : type_map["S"]=3;
1115 : // fill in sigma std::vector
1116 30 : GMM_m_s_.push_back(0.01*15.146); // type 0
1117 30 : GMM_m_s_.push_back(0.01*8.59722); // type 1
1118 30 : GMM_m_s_.push_back(0.01*11.1116); // type 2
1119 30 : GMM_m_s_.push_back(0.01*15.8952); // type 3
1120 : // fill in weight std::vector
1121 30 : GMM_m_w_.push_back(2.49982); // type 0
1122 30 : GMM_m_w_.push_back(1.97692); // type 1
1123 30 : GMM_m_w_.push_back(2.20402); // type 2
1124 30 : GMM_m_w_.push_back(5.14099); // type 3
1125 :
1126 : // check if MOLINFO line is present
1127 30 : if( moldat ) {
1128 30 : log<<" MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
1129 16906 : for(unsigned i=0; i<atoms.size(); ++i) {
1130 : // get atom name
1131 16876 : std::string name = moldat->getAtomName(atoms[i]);
1132 : char type;
1133 : // get atom type
1134 16876 : char first = name.at(0);
1135 : // GOLDEN RULE: type is first letter, if not a number
1136 16876 : if (!isdigit(first)) {
1137 : type = first;
1138 : // otherwise is the second
1139 : } else {
1140 0 : type = name.at(1);
1141 : }
1142 : // check if key in map
1143 16876 : std::string type_s = std::string(1,type);
1144 16876 : if(type_map.find(type_s) != type_map.end()) {
1145 : // save atom type
1146 16876 : GMM_m_type_.push_back(type_map[type_s]);
1147 : // this will be normalized in the final density
1148 16876 : GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
1149 : } else {
1150 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
1151 : }
1152 : }
1153 : } else {
1154 0 : error("MOLINFO DATA not found\n");
1155 : }
1156 30 : return GMM_m_w;
1157 : }
1158 :
1159 2052 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w) {
1160 :
1161 : // check if positive defined, by calculating the 3 leading principal minors
1162 2052 : double pm1 = cov[0];
1163 2052 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
1164 2052 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
1165 : // apply Sylvester’s criterion
1166 2052 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0) {
1167 0 : error("check data GMM: covariance matrix is not positive defined");
1168 : }
1169 :
1170 : // check if weight is positive
1171 2052 : if(w<=0) {
1172 0 : error("check data GMM: weight must be positive");
1173 : }
1174 2052 : }
1175 :
1176 : // read GMM data file in PLUMED format:
1177 30 : void EMMI::get_GMM_d(std::string GMM_file) {
1178 30 : VectorGeneric<6> cov;
1179 :
1180 : // open file
1181 30 : auto ifile=Tools::make_unique<IFile>();
1182 30 : if(ifile->FileExist(GMM_file)) {
1183 30 : ifile->open(GMM_file);
1184 : int idcomp;
1185 4164 : while(ifile->scanField("Id",idcomp)) {
1186 : int beta;
1187 : double w, m0, m1, m2;
1188 4104 : ifile->scanField("Weight",w);
1189 4104 : ifile->scanField("Mean_0",m0);
1190 4104 : ifile->scanField("Mean_1",m1);
1191 4104 : ifile->scanField("Mean_2",m2);
1192 4104 : ifile->scanField("Cov_00",cov[0]);
1193 4104 : ifile->scanField("Cov_01",cov[1]);
1194 4104 : ifile->scanField("Cov_02",cov[2]);
1195 4104 : ifile->scanField("Cov_11",cov[3]);
1196 4104 : ifile->scanField("Cov_12",cov[4]);
1197 4104 : ifile->scanField("Cov_22",cov[5]);
1198 2052 : ifile->scanField("Beta",beta);
1199 : // check input
1200 2052 : check_GMM_d(cov, w);
1201 : // check beta
1202 2052 : if(beta<0) {
1203 0 : error("Beta must be positive!");
1204 : }
1205 : // center of the Gaussian
1206 2052 : GMM_d_m_.push_back(Vector(m0,m1,m2));
1207 : // covariance matrix
1208 2052 : GMM_d_cov_.push_back(cov);
1209 : // weight
1210 2052 : GMM_d_w_.push_back(w);
1211 : // beta
1212 2052 : GMM_d_beta_.push_back(beta);
1213 : // new line
1214 2052 : ifile->scanField();
1215 : }
1216 : } else {
1217 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
1218 : }
1219 : // now create a set from beta (unique set of values)
1220 30 : std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
1221 : // now prepare the group std::vector
1222 30 : GMM_d_grps_.resize(bu.size());
1223 : // and fill it in
1224 2082 : for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
1225 2052 : if(GMM_d_beta_[i]>=GMM_d_grps_.size()) {
1226 0 : error("Check Beta values");
1227 : }
1228 2052 : GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
1229 : }
1230 30 : }
1231 :
1232 30 : void EMMI::calculate_useful_stuff(double reso) {
1233 : // We use the following definition for resolution:
1234 : // the Fourier transform of the density distribution in real space
1235 : // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
1236 : // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
1237 : // average value of B
1238 : double Bave = 0.0;
1239 16906 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1240 16876 : Bave += GMM_m_s_[GMM_m_type_[i]];
1241 : }
1242 30 : Bave /= static_cast<double>(GMM_m_type_.size());
1243 : // calculate blur factor
1244 : double blur = 0.0;
1245 30 : if(reso*reso>Bave) {
1246 2 : blur = reso*reso-Bave;
1247 : } else {
1248 56 : warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
1249 : }
1250 : // add blur to B
1251 150 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1252 120 : GMM_m_s_[i] += blur;
1253 : }
1254 : // calculate average resolution
1255 : double ave_res = 0.0;
1256 16906 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1257 16876 : ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
1258 : }
1259 30 : ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
1260 30 : log.printf(" experimental map resolution : %3.2f\n", reso);
1261 30 : log.printf(" predicted map resolution : %3.2f\n", ave_res);
1262 30 : log.printf(" blur factor : %f\n", blur);
1263 : // now calculate useful stuff
1264 30 : VectorGeneric<6> cov, sum, inv_sum;
1265 : // cycle on all atoms types (4 for the moment)
1266 150 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1267 : // the Gaussian in density (real) space is the FT of scattering factor
1268 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
1269 120 : double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
1270 : // covariance matrix for spherical Gaussian
1271 120 : cov[0]=s*s;
1272 120 : cov[1]=0.0;
1273 120 : cov[2]=0.0;
1274 120 : cov[3]=s*s;
1275 120 : cov[4]=0.0;
1276 120 : cov[5]=s*s;
1277 : // cycle on all data GMM
1278 8328 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
1279 : // we need the sum of the covariance matrices
1280 57456 : for(unsigned k=0; k<6; ++k) {
1281 49248 : sum[k] = cov[k] + GMM_d_cov_[j][k];
1282 : }
1283 : // and to calculate its determinant
1284 8208 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1285 8208 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1286 8208 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1287 : // calculate prefactor - model weights are already normalized
1288 8208 : double pre_fact = cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
1289 : // and its inverse
1290 8208 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1291 8208 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1292 8208 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1293 8208 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1294 8208 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1295 8208 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1296 : // now we store the prefactor
1297 8208 : pre_fact_.push_back(pre_fact);
1298 : // and the inverse of the sum
1299 8208 : inv_cov_md_.push_back(inv_sum);
1300 : }
1301 : }
1302 : // tabulate exponential
1303 30 : dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
1304 30000030 : for(unsigned i=0; i<nexp_; ++i) {
1305 30000000 : tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
1306 : }
1307 30 : }
1308 :
1309 : // get prefactors
1310 1622268 : double EMMI::get_prefactor_inverse
1311 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
1312 : double GMM_w_0, double GMM_w_1,
1313 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum) {
1314 : // we need the sum of the covariance matrices
1315 11355876 : for(unsigned k=0; k<6; ++k) {
1316 9733608 : sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
1317 : }
1318 :
1319 : // and to calculate its determinant
1320 1622268 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1321 1622268 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1322 1622268 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1323 :
1324 : // the prefactor is
1325 1622268 : double pre_fact = cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
1326 :
1327 : // and its inverse
1328 1622268 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1329 1622268 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1330 1622268 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1331 1622268 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1332 1622268 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1333 1622268 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1334 :
1335 : // return pre-factor
1336 1622268 : return pre_fact;
1337 : }
1338 :
1339 2052 : double EMMI::get_self_overlap(unsigned id) {
1340 : double ov_tot = 0.0;
1341 2052 : VectorGeneric<6> sum, inv_sum;
1342 2052 : Vector ov_der;
1343 : // start loop
1344 1624320 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
1345 : // call auxiliary method
1346 1622268 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
1347 1622268 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
1348 : // add overlap to ov_tot
1349 1622268 : ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
1350 : }
1351 : // and return it
1352 2052 : return ov_tot;
1353 : }
1354 :
1355 : // get overlap and derivatives
1356 3737255 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
1357 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der) {
1358 3737255 : Vector md;
1359 : // calculate std::vector difference m_m-d_m with/without pbc
1360 3737255 : if(pbc_) {
1361 0 : md = pbcDistance(d_m, m_m);
1362 : } else {
1363 3737255 : md = delta(d_m, m_m);
1364 : }
1365 : // calculate product of transpose of md and inv_cov_md
1366 3737255 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1367 3737255 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1368 3737255 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1369 : // calculate product of prod and md
1370 3737255 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1371 : // final calculation
1372 3737255 : ov = pre_fact * std::exp(-0.5*ov);
1373 : // derivatives
1374 3737255 : ov_der = ov * Vector(p_x, p_y, p_z);
1375 3737255 : return ov;
1376 : }
1377 :
1378 : // get the exponent of the overlap
1379 59106708 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
1380 : const VectorGeneric<6> &inv_cov_md) {
1381 59106708 : Vector md;
1382 : // calculate std::vector difference m_m-d_m with/without pbc
1383 59106708 : if(pbc_) {
1384 0 : md = pbcDistance(d_m, m_m);
1385 : } else {
1386 59106708 : md = delta(d_m, m_m);
1387 : }
1388 : // calculate product of transpose of md and inv_cov_md
1389 59106708 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1390 59106708 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1391 59106708 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1392 : // calculate product of prod and md
1393 59106708 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1394 59106708 : return ov;
1395 : }
1396 :
1397 18180 : void EMMI::update_neighbor_list() {
1398 : // dimension of GMM and atom std::vectors
1399 18180 : unsigned GMM_d_size = GMM_d_m_.size();
1400 18180 : unsigned GMM_m_size = GMM_m_type_.size();
1401 : // local neighbor list
1402 : std::vector < unsigned > nl_l;
1403 : // clear old neighbor list
1404 18180 : nl_.clear();
1405 :
1406 : // cycle on GMM components - in parallel
1407 118134 : for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
1408 : // overlap lists and map
1409 : std::vector<double> ov_l;
1410 : std::map<double, unsigned> ov_m;
1411 : // total overlap with id
1412 : double ov_tot = 0.0;
1413 : // cycle on all atoms
1414 59206662 : for(unsigned im=0; im<GMM_m_size; ++im) {
1415 : // get index in auxiliary lists
1416 59106708 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1417 : // calculate exponent of overlap
1418 59106708 : double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
1419 : // get index of 0.5*expov in tabulated exponential
1420 59106708 : unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
1421 : // check boundaries and skip atom in case
1422 59106708 : if(itab >= tab_exp_.size()) {
1423 54971808 : continue;
1424 : }
1425 : // in case calculate overlap
1426 4134900 : double ov = pre_fact_[kaux] * tab_exp_[itab];
1427 : // add to list
1428 4134900 : ov_l.push_back(ov);
1429 : // and map to retrieve atom index
1430 4134900 : ov_m[ov] = im;
1431 : // increase ov_tot
1432 4134900 : ov_tot += ov;
1433 : }
1434 : // check if zero size -> ov_tot = 0
1435 99954 : if(ov_l.size()==0) {
1436 : continue;
1437 : }
1438 : // define cutoff
1439 99878 : double ov_cut = ov_tot * nl_cutoff_;
1440 : // sort ov_l in ascending order
1441 99878 : std::sort(ov_l.begin(), ov_l.end());
1442 : // integrate ov_l
1443 : double res = 0.0;
1444 2146886 : for(unsigned i=0; i<ov_l.size(); ++i) {
1445 2146886 : res += ov_l[i];
1446 : // if exceeding the cutoff for overlap, stop
1447 2146886 : if(res >= ov_cut) {
1448 : break;
1449 : } else {
1450 : ov_m.erase(ov_l[i]);
1451 : }
1452 : }
1453 : // now add atoms to neighborlist
1454 2187770 : for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it) {
1455 2087892 : nl_l.push_back(id*GMM_m_size+it->second);
1456 : }
1457 : // end cycle on GMM components in parallel
1458 : }
1459 : // find total dimension of neighborlist
1460 18180 : std::vector <int> recvcounts(size_, 0);
1461 18180 : recvcounts[rank_] = nl_l.size();
1462 18180 : comm.Sum(&recvcounts[0], size_);
1463 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
1464 : // resize neighbor stuff
1465 18180 : nl_.resize(tot_size);
1466 : // calculate std::vector of displacement
1467 18180 : std::vector<int> disp(size_);
1468 18180 : disp[0] = 0;
1469 : int rank_size = 0;
1470 32724 : for(unsigned i=0; i<size_-1; ++i) {
1471 14544 : rank_size += recvcounts[i];
1472 14544 : disp[i+1] = rank_size;
1473 : }
1474 : // Allgather neighbor list
1475 18180 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
1476 : // now resize derivatives
1477 18180 : ovmd_der_.resize(tot_size);
1478 18180 : }
1479 :
1480 70 : void EMMI::prepare() {
1481 70 : if(getExchangeStep()) {
1482 0 : first_time_=true;
1483 : }
1484 70 : }
1485 :
1486 : // overlap calculator
1487 18220 : void EMMI::calculate_overlap() {
1488 :
1489 18220 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
1490 18180 : update_neighbor_list();
1491 18180 : first_time_=false;
1492 : }
1493 :
1494 : // clean temporary std::vectors
1495 192892 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1496 174672 : ovmd_[i] = 0.0;
1497 : }
1498 3525024 : for(unsigned i=0; i<ovmd_der_.size(); ++i) {
1499 3506804 : ovmd_der_[i] = Vector(0,0,0);
1500 : }
1501 :
1502 : // we have to cycle over all model and data GMM components in the neighbor list
1503 18220 : unsigned GMM_d_size = GMM_d_m_.size();
1504 18220 : unsigned GMM_m_size = GMM_m_type_.size();
1505 2133207 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1506 : // get data (id) and atom (im) indexes
1507 2114987 : unsigned id = nl_[i] / GMM_m_size;
1508 2114987 : unsigned im = nl_[i] % GMM_m_size;
1509 : // get index in auxiliary lists
1510 2114987 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1511 : // add overlap with im component of model GMM
1512 2114987 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
1513 2114987 : inv_cov_md_[kaux], ovmd_der_[i]);
1514 : }
1515 : // communicate stuff
1516 18220 : if(size_>1) {
1517 14574 : comm.Sum(&ovmd_[0], ovmd_.size());
1518 14574 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
1519 : }
1520 18220 : }
1521 :
1522 0 : double EMMI::scaleEnergy(double s) {
1523 : double ene = 0.0;
1524 0 : for(unsigned i=0; i<ovdd_.size(); ++i) {
1525 0 : ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
1526 : }
1527 0 : return ene;
1528 : }
1529 :
1530 0 : double EMMI::doRegression() {
1531 : // standard MC parameters
1532 : unsigned MCsteps = 100000;
1533 : double kbtmin = 1.0;
1534 : double kbtmax = 10.0;
1535 : unsigned ncold = 5000;
1536 : unsigned nhot = 2000;
1537 : double MCacc = 0.0;
1538 : double kbt, ebest, scale_best;
1539 :
1540 : // initial value of scale factor and energy
1541 0 : double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
1542 0 : double ene = scaleEnergy(scale);
1543 : // set best energy
1544 0 : ebest = ene;
1545 :
1546 : // MC loop
1547 0 : for(unsigned istep=0; istep<MCsteps; ++istep) {
1548 : // get temperature
1549 0 : if(istep%(ncold+nhot)<ncold) {
1550 : kbt = kbtmin;
1551 : } else {
1552 : kbt = kbtmax;
1553 : }
1554 : // propose move in scale
1555 0 : double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
1556 0 : double new_scale = scale + ds;
1557 : // check boundaries
1558 0 : if(new_scale > scale_max_) {
1559 0 : new_scale = 2.0 * scale_max_ - new_scale;
1560 : }
1561 0 : if(new_scale < scale_min_) {
1562 0 : new_scale = 2.0 * scale_min_ - new_scale;
1563 : }
1564 : // new energy
1565 0 : double new_ene = scaleEnergy(new_scale);
1566 : // accept or reject
1567 0 : bool accept = doAccept(ene, new_ene, kbt);
1568 : // in case of acceptance
1569 0 : if(accept) {
1570 : scale = new_scale;
1571 : ene = new_ene;
1572 0 : MCacc += 1.0;
1573 : }
1574 : // save best
1575 0 : if(ene<ebest) {
1576 0 : ebest = ene;
1577 0 : scale_best = scale;
1578 : }
1579 : }
1580 : // calculate acceptance
1581 0 : double accscale = MCacc / static_cast<double>(MCsteps);
1582 : // global communication
1583 0 : if(!no_aver_ && nrep_>1) {
1584 0 : if(replica_!=0) {
1585 0 : scale_best = 0.0;
1586 0 : ebest = 0.0;
1587 0 : accscale = 0.0;
1588 : }
1589 0 : if(rank_==0) {
1590 0 : multi_sim_comm.Sum(&scale_best, 1);
1591 0 : multi_sim_comm.Sum(&ebest, 1);
1592 0 : multi_sim_comm.Sum(&accscale, 1);
1593 : }
1594 : }
1595 : // local communication
1596 0 : if(rank_!=0) {
1597 0 : scale_best = 0.0;
1598 0 : ebest = 0.0;
1599 0 : accscale = 0.0;
1600 : }
1601 0 : if(size_>1) {
1602 0 : comm.Sum(&scale_best, 1);
1603 0 : comm.Sum(&ebest, 1);
1604 0 : comm.Sum(&accscale, 1);
1605 : }
1606 : // set scale parameters
1607 0 : getPntrToComponent("accscale")->set(accscale);
1608 0 : getPntrToComponent("enescale")->set(ebest);
1609 : // return scale value
1610 0 : return scale_best;
1611 : }
1612 :
1613 20 : double EMMI::get_annealing(long long int step) {
1614 : // default no annealing
1615 : double fact = 1.0;
1616 : // position in annealing cycle
1617 20 : unsigned nc = step%(4*nanneal_);
1618 : // useful doubles
1619 20 : double ncd = static_cast<double>(nc);
1620 20 : double nn = static_cast<double>(nanneal_);
1621 : // set fact
1622 20 : if(nc>=nanneal_ && nc<2*nanneal_) {
1623 6 : fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
1624 : }
1625 20 : if(nc>=2*nanneal_ && nc<3*nanneal_) {
1626 4 : fact = kanneal_;
1627 : }
1628 20 : if(nc>=3*nanneal_) {
1629 2 : fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
1630 : }
1631 20 : return fact;
1632 : }
1633 :
1634 18220 : void EMMI::get_weights(double &weight, double &norm, double &neff) {
1635 18220 : const double dnrep = static_cast<double>(nrep_);
1636 : // calculate the weights either from BIAS
1637 18220 : if(do_reweight_) {
1638 12 : std::vector<double> bias(nrep_,0);
1639 12 : if(rank_==0) {
1640 12 : bias[replica_] = getArgument(0);
1641 12 : if(nrep_>1) {
1642 12 : multi_sim_comm.Sum(&bias[0], nrep_);
1643 : }
1644 : }
1645 12 : comm.Sum(&bias[0], nrep_);
1646 :
1647 : // accumulate weights
1648 12 : if(!first_time_w_) {
1649 30 : for(unsigned i=0; i<nrep_; ++i) {
1650 20 : const double delta=bias[i]-average_weights_[i];
1651 : // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
1652 : // probably because we're making several calls to calculate(), causing accumulation
1653 : // of epsilons. Maybe we can work on a temporary copy of the action instead?
1654 20 : average_weights_[i]+=decay_w_*delta;
1655 : }
1656 : } else {
1657 2 : first_time_w_ = false;
1658 6 : for(unsigned i=0; i<nrep_; ++i) {
1659 4 : average_weights_[i] = bias[i];
1660 : }
1661 : }
1662 :
1663 : // set average back into bias and set norm to one
1664 12 : const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
1665 36 : for(unsigned i=0; i<nrep_; ++i) {
1666 24 : bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
1667 : }
1668 : // set local weight, norm and weight variance
1669 12 : weight = bias[replica_];
1670 : double w2=0.;
1671 36 : for(unsigned i=0; i<nrep_; ++i) {
1672 24 : w2 += bias[i]*bias[i];
1673 24 : norm += bias[i];
1674 : }
1675 12 : neff = norm*norm/w2;
1676 24 : getPntrToComponent("weight")->set(weight/norm);
1677 : } else {
1678 : // or arithmetic ones
1679 18208 : neff = dnrep;
1680 18208 : weight = 1.0;
1681 18208 : norm = dnrep;
1682 : }
1683 18220 : getPntrToComponent("neff")->set(neff);
1684 18220 : }
1685 :
1686 18220 : void EMMI::calculate() {
1687 :
1688 : // calculate CV
1689 18220 : calculate_overlap();
1690 :
1691 : // rescale factor for ensemble average
1692 18220 : double weight = 0.;
1693 18220 : double neff = 0.;
1694 18220 : double norm = 0.;
1695 18220 : get_weights(weight, norm, neff);
1696 :
1697 : // in case of ensemble averaging, calculate average overlap
1698 18220 : if(!no_aver_ && nrep_>1) {
1699 : // if master node, calculate average across replicas
1700 12 : if(rank_==0) {
1701 10812 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1702 10800 : ovmd_ave_[i] = weight / norm * ovmd_[i];
1703 : }
1704 12 : multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1705 : } else {
1706 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) {
1707 0 : ovmd_ave_[i] = 0.0;
1708 : }
1709 : }
1710 : // local communication
1711 12 : if(size_>1) {
1712 0 : comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1713 : }
1714 : } else {
1715 182080 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1716 163872 : ovmd_ave_[i] = ovmd_[i];
1717 : }
1718 : }
1719 :
1720 : // get time step
1721 18220 : long long int step = getStep();
1722 :
1723 : // do regression
1724 18220 : if(nregres_>0) {
1725 0 : if(step%nregres_==0 && !getExchangeStep()) {
1726 0 : scale_ = doRegression();
1727 : }
1728 : // set scale component
1729 0 : getPntrToComponent("scale")->set(scale_);
1730 : }
1731 :
1732 : // write model overlap to file
1733 18220 : if(ovstride_>0 && step%ovstride_==0) {
1734 4 : write_model_overlap(step);
1735 : }
1736 :
1737 : // clear energy and virial
1738 18220 : ene_ = 0.0;
1739 18220 : virial_.zero();
1740 :
1741 : // Gaussian noise
1742 18220 : if(noise_==0) {
1743 7318 : calculate_Gauss();
1744 : }
1745 :
1746 : // Outliers noise
1747 18220 : if(noise_==1) {
1748 5451 : calculate_Outliers();
1749 : }
1750 :
1751 : // Marginal noise
1752 18220 : if(noise_==2) {
1753 5451 : calculate_Marginal();
1754 : }
1755 :
1756 : // get annealing rescale factor
1757 18220 : if(nanneal_>0) {
1758 20 : anneal_ = get_annealing(step);
1759 40 : getPntrToComponent("anneal")->set(anneal_);
1760 : }
1761 :
1762 : // annealing rescale
1763 18220 : ene_ /= anneal_;
1764 :
1765 18220 : std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
1766 : // in case of ensemble averaging
1767 18220 : if(!no_aver_ && nrep_>1) {
1768 : // if master node, sum der_GMMid derivatives and ene
1769 12 : if(rank_==0) {
1770 10812 : for(unsigned i=0; i<GMMid_der_.size(); ++i) {
1771 10800 : GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
1772 : }
1773 12 : multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1774 12 : multi_sim_comm.Sum(&ene_, 1);
1775 : } else {
1776 : // set der_GMMid derivatives and energy to zero
1777 0 : for(unsigned i=0; i<GMMid_der_av_.size(); ++i) {
1778 0 : GMMid_der_av_[i]=0.0;
1779 : }
1780 0 : ene_ = 0.0;
1781 : }
1782 : // local communication
1783 12 : if(size_>1) {
1784 0 : comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1785 0 : comm.Sum(&ene_, 1);
1786 : }
1787 : } else {
1788 182080 : for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
1789 163872 : GMMid_der_av_[i] = GMMid_der_[i];
1790 : }
1791 : }
1792 :
1793 : // clean temporary std::vector
1794 10979556 : for(unsigned i=0; i<atom_der_.size(); ++i) {
1795 10961336 : atom_der_[i] = Vector(0,0,0);
1796 : }
1797 :
1798 : // get derivatives of bias with respect to atoms
1799 2133207 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1800 : // get indexes of data and model component
1801 2114987 : unsigned id = nl_[i] / GMM_m_type_.size();
1802 2114987 : unsigned im = nl_[i] % GMM_m_type_.size();
1803 : // chain rule + replica normalization
1804 2114987 : Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
1805 2114987 : Vector pos;
1806 2114987 : if(pbc_) {
1807 0 : pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
1808 : } else {
1809 2114987 : pos = getPosition(im);
1810 : }
1811 : // increment derivatives and virial
1812 2114987 : atom_der_[im] += tot_der;
1813 2114987 : virial_ += Tensor(pos, -tot_der);
1814 : }
1815 :
1816 : // communicate local derivatives and virial
1817 18220 : if(size_>1) {
1818 14574 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
1819 14574 : comm.Sum(virial_);
1820 : }
1821 :
1822 : // set derivatives, virial, and score
1823 10979556 : for(unsigned i=0; i<atom_der_.size(); ++i) {
1824 21922672 : setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
1825 : }
1826 18220 : setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
1827 18220 : getPntrToComponent("scoreb")->set(ene_);
1828 :
1829 18220 : if (do_reweight_) {
1830 : double w_tmp = 0.;
1831 10812 : for (unsigned i = 0; i < ovmd_.size(); ++i) {
1832 10800 : w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
1833 : }
1834 12 : w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
1835 :
1836 12 : setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
1837 24 : getPntrToComponent("biasDer")->set(w_tmp);
1838 : }
1839 :
1840 : // This part is needed only for Gaussian and Outliers noise models
1841 18220 : if(noise_!=2) {
1842 :
1843 : // do Montecarlo
1844 12769 : if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) {
1845 0 : doMonteCarlo();
1846 : }
1847 :
1848 : // print status
1849 12769 : if(step%statusstride_==0) {
1850 12729 : print_status(step);
1851 : }
1852 :
1853 : // calculate acceptance ratio
1854 12769 : double acc = MCaccept_ / MCtrials_;
1855 :
1856 : // set value
1857 25538 : getPntrToComponent("acc")->set(acc);
1858 :
1859 : }
1860 :
1861 18220 : }
1862 :
1863 7318 : void EMMI::calculate_Gauss() {
1864 : // cycle on all the GMM groups
1865 14636 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1866 : double eneg = 0.0;
1867 : // cycle on all the members of the group
1868 83872 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1869 : // id of the GMM component
1870 76554 : int GMMid = GMM_d_grps_[i][j];
1871 : // calculate deviation
1872 76554 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1873 : // add to group energy
1874 76554 : eneg += 0.5 * dev * dev;
1875 : // store derivative for later
1876 76554 : GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
1877 : }
1878 : // add to total energy along with normalizations and prior
1879 7318 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1880 : }
1881 7318 : }
1882 :
1883 5451 : void EMMI::calculate_Outliers() {
1884 : // cycle on all the GMM groups
1885 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1886 : // cycle on all the members of the group
1887 : double eneg = 0.0;
1888 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1889 : // id of the GMM component
1890 49059 : int GMMid = GMM_d_grps_[i][j];
1891 : // calculate deviation
1892 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1893 : // add to group energy
1894 49059 : eneg += std::log1p( 0.5 * dev * dev );
1895 : // store derivative for later
1896 49059 : GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
1897 : }
1898 : // add to total energy along with normalizations and prior
1899 5451 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1900 : }
1901 5451 : }
1902 :
1903 5451 : void EMMI::calculate_Marginal() {
1904 : // cycle on all the GMM groups
1905 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1906 : // cycle on all the members of the group
1907 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1908 : // id of the GMM component
1909 49059 : int GMMid = GMM_d_grps_[i][j];
1910 : // calculate deviation
1911 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
1912 : // calculate errf
1913 49059 : double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
1914 : // add to group energy
1915 49059 : ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
1916 : // store derivative for later
1917 49059 : GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
1918 : }
1919 : }
1920 5451 : }
1921 :
1922 : }
1923 : }
|