Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2020 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "colvar/Colvar.h"
23 : #include "colvar/ActionRegister.h"
24 : #include "core/PlumedMain.h"
25 : #include "tools/Matrix.h"
26 : #include "core/SetupMolInfo.h"
27 : #include "core/ActionSet.h"
28 : #include "tools/File.h"
29 :
30 : #include <string>
31 : #include <cmath>
32 : #include <map>
33 : #include <numeric>
34 : #include <ctime>
35 : #include <sstream>
36 :
37 : using namespace std;
38 :
39 : namespace PLMD {
40 : namespace isdb {
41 :
42 : //+PLUMEDOC ISDB_COLVAR EMMI
43 : /*
44 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
45 :
46 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
47 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
48 :
49 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
50 : GMM_FILE. We are currently working on a web server to perform
51 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
52 :
53 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
54 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
55 : the Metainference approach \cite Bonomi:2016ip .
56 :
57 : \warning
58 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
59 :
60 : \note
61 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
62 : In this case, the user should add the NO_AVER flag to the input line.
63 :
64 : \note
65 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
66 : add the NOPBC flag to the input line
67 :
68 : \par Examples
69 :
70 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
71 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
72 :
73 : \verbatim
74 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
75 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
76 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
77 : ...
78 : \endverbatim
79 :
80 : To accelerate the computation of the Bayesian score, one can:
81 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
82 : - calculate the restraint every other step (or more).
83 :
84 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
85 : using a GROMACS index file.
86 :
87 : The input file looks as follows:
88 :
89 : \plumedfile
90 : # include pdb info
91 : MOLINFO STRUCTURE=prot.pdb
92 :
93 : # all heavy atoms
94 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
95 :
96 : # create EMMI score
97 : gmm: EMMI NOPBC SIGMA_MEAN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
98 :
99 : # translate into bias - apply every 2 steps
100 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
101 :
102 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
103 : \endplumedfile
104 :
105 :
106 : */
107 : //+ENDPLUMEDOC
108 :
109 12 : class EMMI : public Colvar {
110 :
111 : private:
112 :
113 : // temperature in kbt
114 : double kbt_;
115 : // model GMM - weights and atom types
116 : vector<double> GMM_m_w_;
117 : vector<unsigned> GMM_m_type_;
118 : // data GMM - means, weights, and covariances + beta option
119 : vector<Vector> GMM_d_m_;
120 : vector<double> GMM_d_w_;
121 : vector< VectorGeneric<6> > GMM_d_cov_;
122 : vector<int> GMM_d_beta_;
123 : // overlaps
124 : vector<double> ovmd_;
125 : vector<double> ovdd_;
126 : vector<double> ovmd_ave_;
127 : double ov_cut_;
128 : vector<double> ovdd_cut_;
129 : // and derivatives
130 : vector<Vector> ovmd_der_;
131 : vector<Vector> atom_der_;
132 : vector<Vector> atom_der_b_;
133 : vector<double> err_f_;
134 : // constant quantities;
135 : double cfact_;
136 : double inv_sqrt2_, sqrt2_pi_;
137 : // metainference
138 : unsigned nrep_;
139 : unsigned replica_;
140 : vector<double> sigma_mean_;
141 :
142 : // auxiliary stuff
143 : // list of atom sigmas
144 : vector<double> s_map_;
145 : // list of prefactors for overlap between two components of model and data GMM
146 : // fact_md = w_m * w_d / (2pi)**1.5 / sqrt(det_md)
147 : vector< double > fact_md_;
148 : // inverse of the sum of model and data covariances matrices
149 : vector< VectorGeneric<6> > inv_cov_md_;
150 : // neighbor list
151 : double nl_cutoff_;
152 : unsigned nl_stride_;
153 : bool first_time_, no_aver_;
154 : vector < unsigned > nl_;
155 : // parallel stuff
156 : unsigned size_;
157 : unsigned rank_;
158 :
159 : // analysis mode
160 : bool analysis_;
161 : OFile Devfile_;
162 : double nframe_;
163 :
164 : // pbc
165 : bool pbc_;
166 :
167 : // calculate model GMM weights and covariances - these are constants
168 : void get_GMM_m(vector<AtomNumber> &atoms);
169 : // read data GMM file
170 : void get_GMM_d(string gmm_file);
171 : // normalize GMM
172 : void normalize_GMM(vector<double> &w);
173 : // check GMM data
174 : void check_GMM_d(VectorGeneric<6> &cov, double w);
175 :
176 : // get auxiliary stuff
177 : void get_auxiliary_stuff();
178 : // get cutoff in overlap
179 : void get_cutoff_ov();
180 : // get fact_md and inv_cov_md
181 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
182 : double &GMM_w_0, double &GMM_w_1,
183 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
184 : // calculate self overlaps between data GMM components - ovdd_
185 : double get_self_overlap(unsigned id);
186 : // calculate overlap between two components
187 : double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
188 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
189 : double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
190 : const VectorGeneric<6> &inv_cov_md);
191 : // update the neighbor list
192 : void update_neighbor_list();
193 : // calculate overlap
194 : void calculate_overlap();
195 :
196 : public:
197 : static void registerKeywords( Keywords& keys );
198 : explicit EMMI(const ActionOptions&);
199 : // active methods:
200 : void prepare();
201 : virtual void calculate();
202 : };
203 :
204 7364 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
205 :
206 5 : void EMMI::registerKeywords( Keywords& keys ) {
207 5 : Colvar::registerKeywords( keys );
208 20 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
209 20 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
210 20 : keys.add("compulsory","TEMP","temperature");
211 15 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
212 15 : keys.addFlag("ANALYSIS",false,"run in analysis mode");
213 20 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
214 20 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
215 20 : keys.add("compulsory","SIGMA_MEAN","starting value for the uncertainty in the mean estimate");
216 5 : componentsAreNotOptional(keys);
217 20 : keys.addOutputComponent("score", "default","Bayesian score");
218 20 : keys.addOutputComponent("scoreb", "default","Beta Bayesian score");
219 5 : }
220 :
221 4 : EMMI::EMMI(const ActionOptions&ao):
222 : PLUMED_COLVAR_INIT(ao),
223 : inv_sqrt2_(0.707106781186548),
224 : sqrt2_pi_(0.797884560802865),
225 : nl_cutoff_(-1.0), nl_stride_(0),
226 : first_time_(true), no_aver_(false),
227 40 : analysis_(false), nframe_(0.0), pbc_(true)
228 : {
229 :
230 4 : bool nopbc=!pbc_;
231 8 : parseFlag("NOPBC",nopbc);
232 4 : pbc_=!nopbc;
233 :
234 : vector<AtomNumber> atoms;
235 8 : parseAtomList("ATOMS",atoms);
236 :
237 : string GMM_file;
238 8 : parse("GMM_FILE",GMM_file);
239 :
240 : // uncertainty stuff
241 : double sigma_mean;
242 8 : parse("SIGMA_MEAN",sigma_mean);
243 :
244 : // temperature
245 4 : double temp=0.0;
246 8 : parse("TEMP",temp);
247 : // convert temp to kbt
248 8 : if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
249 0 : else kbt_=plumed.getAtoms().getKbT();
250 :
251 : // neighbor list stuff
252 8 : parse("NL_CUTOFF",nl_cutoff_);
253 4 : if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
254 8 : parse("NL_STRIDE",nl_stride_);
255 4 : if(nl_stride_<=0) error("NL_STRIDE should be explicitly specified and positive");
256 :
257 8 : parseFlag("NO_AVER",no_aver_);
258 8 : parseFlag("ANALYSIS",analysis_);
259 :
260 4 : checkRead();
261 :
262 : // set parallel stuff
263 4 : size_=comm.Get_size();
264 4 : rank_=comm.Get_rank();
265 :
266 : // get number of replicas
267 4 : if(rank_==0) {
268 3 : if(no_aver_) {
269 3 : nrep_ = 1;
270 3 : replica_ = 0;
271 : } else {
272 0 : nrep_ = multi_sim_comm.Get_size();
273 0 : replica_ = multi_sim_comm.Get_rank();
274 : }
275 : } else {
276 1 : nrep_ = 0;
277 1 : replica_ = 0;
278 : }
279 4 : comm.Sum(&nrep_,1);
280 4 : comm.Sum(&replica_,1);
281 :
282 4 : log.printf(" atoms involved : ");
283 7232 : for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
284 4 : log.printf("\n");
285 8 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
286 4 : if(no_aver_) log.printf(" without ensemble averaging\n");
287 4 : log.printf(" neighbor list overlap cutoff : %lf\n", nl_cutoff_);
288 4 : log.printf(" neighbor list stride : %u\n", nl_stride_);
289 4 : log.printf(" uncertainty in the mean estimate %f\n",sigma_mean);
290 4 : log.printf(" temperature of the system in energy unit %f\n",kbt_);
291 4 : log.printf(" number of replicas %u\n",nrep_);
292 :
293 :
294 : // set constant quantity before calculating stuff
295 4 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
296 :
297 : // calculate model GMM constant parameters
298 4 : get_GMM_m(atoms);
299 :
300 : // read data GMM parameters
301 8 : get_GMM_d(GMM_file);
302 8 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
303 :
304 : // normalize GMMs
305 4 : normalize_GMM(GMM_m_w_);
306 4 : normalize_GMM(GMM_d_w_);
307 :
308 : // get self overlaps between data GMM components
309 4712 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
310 1568 : double ov = get_self_overlap(i);
311 1568 : ovdd_.push_back(ov);
312 3136 : sigma_mean_.push_back(sigma_mean*ov);
313 : }
314 :
315 : // calculate auxiliary stuff
316 4 : get_auxiliary_stuff();
317 :
318 : // get cutoff for overlap calculation - avoid millions of exp calculations
319 4 : get_cutoff_ov();
320 :
321 : // and prepare temporary vectors
322 4 : ovmd_.resize(GMM_d_w_.size());
323 4 : err_f_.resize(GMM_d_w_.size());
324 4 : atom_der_.resize(GMM_m_w_.size());
325 4 : atom_der_b_.resize(GMM_m_w_.size());
326 :
327 : // clear things that are not needed anymore
328 : GMM_d_cov_.clear();
329 :
330 : // add components
331 12 : addComponentWithDerivatives("score"); componentIsNotPeriodic("score");
332 12 : addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
333 :
334 : // request the atoms
335 4 : requestAtoms(atoms);
336 :
337 12 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
338 12 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
339 12 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
340 4 : log<<"\n";
341 4 : }
342 :
343 4 : void EMMI::get_GMM_m(vector<AtomNumber> &atoms)
344 : {
345 8 : vector<SetupMolInfo*> moldat=plumed.getActionSet().select<SetupMolInfo*>();
346 :
347 : // map of atom types to A and B coefficients of scattering factor
348 : // f(s) = A * exp(-B*s**2)
349 : // B is in Angstrom squared
350 : map<string, double> w_map;
351 8 : w_map["C"] = 2.49982; // type 0
352 8 : w_map["O"] = 1.97692; // type 1
353 8 : w_map["N"] = 2.20402; // type 2
354 8 : w_map["S"] = 5.14099; // type 3
355 : // map between an atom type and an index
356 : map<string, unsigned> type_map;
357 8 : type_map["C"]=0;
358 8 : type_map["O"]=1;
359 8 : type_map["N"]=2;
360 8 : type_map["S"]=3;
361 : // fill in the sigma vector
362 8 : s_map_.push_back(15.146);
363 8 : s_map_.push_back(8.59722);
364 8 : s_map_.push_back(11.1116);
365 8 : s_map_.push_back(15.8952);
366 :
367 : // check if MOLINFO line is present
368 4 : if( moldat.size()==1 ) {
369 4 : log<<" MOLINFO DATA found, using proper atom names\n";
370 7232 : for(unsigned i=0; i<atoms.size(); ++i) {
371 : // get atom name
372 4816 : string name = moldat[0]->getAtomName(atoms[i]);
373 : char type;
374 : // get atom type
375 2408 : char first = name.at(0);
376 : // GOLDEN RULE: type is first letter, if not a number
377 2408 : if (!isdigit(first)) {
378 : type = first;
379 : // otherwise is the second
380 : } else {
381 0 : type = name.at(1);
382 : }
383 : // check if key in map
384 2408 : std::string type_s = std::string(1,type);
385 2408 : if(w_map.find(type_s) != w_map.end()) {
386 : // save atom type
387 2408 : GMM_m_type_.push_back(type_map[type_s]);
388 : // this will be normalized to 1 in the final density
389 2408 : GMM_m_w_.push_back(w_map[type_s]);
390 : } else {
391 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
392 : }
393 : }
394 : } else {
395 0 : error("MOLINFO DATA not found\n");
396 : }
397 4 : }
398 :
399 :
400 1568 : void EMMI::check_GMM_d(VectorGeneric<6> &cov, double w)
401 : {
402 :
403 : // check if positive defined, by calculating the 3 leading principal minors
404 1568 : double pm1 = cov[0];
405 1568 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
406 1568 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
407 : // apply Sylvester’s criterion
408 1568 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
409 0 : error("check data GMM: covariance matrix is not positive defined");
410 :
411 : // check if weight is positive
412 1568 : if(w<0.0) error("check data GMM: weight must be positive");
413 1568 : }
414 :
415 :
416 : // read GMM data file in PLUMED format:
417 4 : void EMMI::get_GMM_d(string GMM_file)
418 : {
419 4 : VectorGeneric<6> cov;
420 :
421 : // open file
422 4 : std::unique_ptr<IFile> ifile(new IFile);
423 4 : if(ifile->FileExist(GMM_file)) {
424 4 : ifile->open(GMM_file);
425 : int idcomp;
426 4712 : while(ifile->scanField("Id",idcomp)) {
427 : int beta;
428 : double w, m0, m1, m2;
429 3136 : ifile->scanField("Weight",w);
430 3136 : ifile->scanField("Mean_0",m0);
431 3136 : ifile->scanField("Mean_1",m1);
432 3136 : ifile->scanField("Mean_2",m2);
433 3136 : ifile->scanField("Cov_00",cov[0]);
434 3136 : ifile->scanField("Cov_01",cov[1]);
435 3136 : ifile->scanField("Cov_02",cov[2]);
436 3136 : ifile->scanField("Cov_11",cov[3]);
437 3136 : ifile->scanField("Cov_12",cov[4]);
438 3136 : ifile->scanField("Cov_22",cov[5]);
439 3136 : ifile->scanField("Beta",beta);
440 : // check input
441 1568 : check_GMM_d(cov, w);
442 : // center of the Gaussian
443 3136 : GMM_d_m_.push_back(Vector(m0,m1,m2));
444 : // covariance matrix
445 1568 : GMM_d_cov_.push_back(cov);
446 : // weights
447 1568 : GMM_d_w_.push_back(w);
448 : // beta
449 1568 : GMM_d_beta_.push_back(beta);
450 : // new line
451 1568 : ifile->scanField();
452 : }
453 : } else {
454 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
455 : }
456 :
457 4 : }
458 :
459 : // normalize GMM to sum to 1
460 : // since all the GMM components are individually normalized, we just need to
461 : // divide each weight for the sum of the weights
462 8 : void EMMI::normalize_GMM(vector<double> &w)
463 : {
464 : double norm = accumulate(w.begin(), w.end(), 0.0);
465 11944 : for(unsigned i=0; i<w.size(); ++i) w[i] /= norm;
466 8 : }
467 :
468 4 : void EMMI::get_auxiliary_stuff()
469 : {
470 4 : VectorGeneric<6> cov, sum, inv_sum;
471 : // cycle on all atoms types
472 36 : for(unsigned i=0; i<4; ++i) {
473 : // the Gaussian in density (real) space is the FT of scattering factor
474 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
475 32 : double s = sqrt ( 0.5 * s_map_[i] ) / pi * 0.1;
476 : // covariance matrix for spherical Gaussian
477 16 : cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
478 16 : cov[3]=s*s; cov[4]=0.0;
479 16 : cov[5]=s*s;
480 : // cycle on all data GMM
481 18848 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
482 : // we need the sum of the covariance matrices
483 81536 : for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
484 : // and to calculate its determinant
485 6272 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
486 6272 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
487 6272 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
488 : // the constant part of the prefactor is
489 6272 : double pre_fact = cfact_ / sqrt(det);
490 : // and its inverse
491 6272 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
492 6272 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
493 6272 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
494 6272 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
495 6272 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
496 6272 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
497 : // now we store the pre_fact
498 6272 : fact_md_.push_back(pre_fact);
499 : // and the inverse of the sum
500 6272 : inv_cov_md_.push_back(inv_sum);
501 : }
502 : }
503 :
504 4 : }
505 :
506 : // get prefactors
507 614656 : double EMMI::get_prefactor_inverse
508 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
509 : double &GMM_w_0, double &GMM_w_1,
510 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
511 : {
512 : // we need the sum of the covariance matrices
513 4302592 : for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
514 :
515 : // and to calculate its determinant
516 614656 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
517 614656 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
518 614656 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
519 :
520 : // the prefactor is
521 614656 : double pre_fact = cfact_ / sqrt(det) * GMM_w_0 * GMM_w_1;
522 :
523 : // and its inverse
524 614656 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
525 614656 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
526 614656 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
527 614656 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
528 614656 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
529 614656 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
530 :
531 : // return pre-factor
532 614656 : return pre_fact;
533 : }
534 :
535 1568 : double EMMI::get_self_overlap(unsigned id)
536 : {
537 : vector<double> ov;
538 1568 : VectorGeneric<6> sum, inv_sum;
539 1568 : Vector ov_der;
540 : // start loop
541 1847104 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
542 : // call auxiliary method
543 614656 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
544 1229312 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
545 : // calculate overlap
546 614656 : double ov_tmp = get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
547 : // add to list
548 614656 : ov.push_back(ov_tmp);
549 : }
550 : // calculate total
551 : double ov_tot = accumulate(ov.begin(), ov.end(), 0.0);
552 : // sort in ascending order
553 : std::sort(ov.begin(), ov.end());
554 : // get cutoff = nl_cutoff_ * ov_tot
555 1568 : double ov_cut = ov_tot * nl_cutoff_;
556 : // integrate tail of ov
557 : double ov_sum = 0.0;
558 1769872 : for(unsigned i=1; i<ov.size(); ++i) {
559 590480 : ov_sum += ov[i];
560 590480 : if(ov_sum >= ov_cut) {
561 3136 : ov_cut = ov[i-1];
562 1568 : break;
563 : }
564 : }
565 : // store
566 1568 : ovdd_cut_.push_back(ov_cut);
567 : // and return it
568 1568 : return ov_tot;
569 : }
570 :
571 : // this is to avoid the calculation of millions of exp function
572 : // when updating the neighbor list using calculate_overlap
573 4 : void EMMI::get_cutoff_ov()
574 : {
575 : // temporary stuff
576 4 : unsigned GMM_d_w_size = GMM_d_w_.size();
577 : // set ov_cut_ to a huge number
578 4 : ov_cut_ = 1.0+9;
579 : // calculate minimum value needed for cutoff
580 3140 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
581 2834944 : for(unsigned j=0; j<GMM_m_w_.size(); ++j) {
582 : // get atom type
583 943936 : unsigned jtype = GMM_m_type_[j];
584 : // get index in auxiliary lists
585 943936 : unsigned kaux = jtype * GMM_d_w_size + i;
586 : // get prefactor and multiply by weights
587 3775744 : double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
588 : // calculate ov
589 943936 : double ov = ovdd_cut_[i] / pre_fact;
590 : // check
591 943936 : if(ov < ov_cut_) ov_cut_ = ov;
592 : }
593 : }
594 : // set cutoff
595 4 : ov_cut_ = -2.0 * std::log(ov_cut_);
596 4 : }
597 :
598 : // version with derivatives
599 10788184 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
600 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
601 : {
602 10788184 : Vector md;
603 : // calculate vector difference m_m-d_m with/without pbc
604 10788184 : if(pbc_) md = pbcDistance(d_m, m_m);
605 10788184 : else md = delta(d_m, m_m);
606 : // calculate product of transpose of md and inv_cov_md
607 10788184 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
608 10788184 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
609 10788184 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
610 : // calculate product of prod and md
611 10788184 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
612 : // final calculation
613 10788184 : ov = fact_md * exp(-0.5*ov);
614 : // derivatives
615 10788184 : ov_der = ov * Vector(p_x, p_y, p_z);
616 10788184 : return ov;
617 : }
618 :
619 : // fast version without derivatives and cutoff used for neighbor list
620 429018912 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
621 : const VectorGeneric<6> &inv_cov_md)
622 :
623 : {
624 429018912 : Vector md;
625 : // calculate vector difference m_m-d_m with/without pbc
626 429018912 : if(pbc_) md = pbcDistance(d_m, m_m);
627 429018912 : else md = delta(d_m, m_m);
628 : // calculate product of transpose of md and inv_cov_md
629 429018912 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
630 429018912 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
631 429018912 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
632 : // calculate product of prod and md
633 429018912 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
634 : // final calculation
635 429018912 : if( ov > ov_cut_ ) {
636 : ov = 0.0;
637 : } else {
638 24333930 : ov = fact_md * exp(-0.5*ov);
639 : }
640 429018912 : return ov;
641 : }
642 :
643 1819 : void EMMI::update_neighbor_list()
644 : {
645 : // temp stuff
646 1819 : unsigned GMM_d_w_size = GMM_d_w_.size();
647 1819 : unsigned GMM_m_w_size = GMM_m_w_.size();
648 : // local neighbor list
649 : vector < unsigned > nl_l;
650 : // clear old neighbor list
651 1819 : nl_.clear();
652 : // cycle on all overlaps (in parallel)
653 1819 : unsigned nover = GMM_d_w_size * GMM_m_w_size;
654 429020731 : for(unsigned k=rank_; k<nover; k=k+size_) {
655 : // get indexes
656 429018912 : unsigned i = k / GMM_m_w_size;
657 429018912 : unsigned j = k % GMM_m_w_size;
658 : // get atom type
659 858037824 : unsigned jtype = GMM_m_type_[j];
660 : // get index in auxiliary lists
661 429018912 : unsigned kaux = jtype * GMM_d_w_size + i;
662 : // get prefactor and multiply by weights
663 1716075648 : double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
664 : // calculate overlap
665 858037824 : double ov = get_overlap(GMM_d_m_[i], getPosition(j), pre_fact, inv_cov_md_[kaux]);
666 : // fill the neighbor list
667 429018912 : if(ov >= ovdd_cut_[i]) nl_l.push_back(k);
668 : }
669 : // find total dimension of neighborlist
670 1819 : vector <int> recvcounts(size_, 0);
671 1819 : recvcounts[rank_] = nl_l.size();
672 3638 : comm.Sum(&recvcounts[0], size_);
673 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
674 : // resize neighbor stuff
675 1819 : nl_.resize(tot_size);
676 : // calculate vector of displacement
677 1819 : vector<int> disp(size_);
678 1819 : disp[0] = 0;
679 : int rank_size = 0;
680 1823 : for(unsigned i=0; i<size_-1; ++i) {
681 4 : rank_size += recvcounts[i];
682 4 : disp[i+1] = rank_size;
683 : }
684 : // Allgather neighbor list
685 7276 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
686 : // now resize derivatives
687 1819 : ovmd_der_.resize(tot_size);
688 1819 : }
689 :
690 4 : void EMMI::prepare()
691 : {
692 4 : if(getExchangeStep()) first_time_=true;
693 4 : }
694 :
695 :
696 : // overlap calculator
697 1819 : void EMMI::calculate_overlap() {
698 :
699 1819 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
700 1819 : update_neighbor_list();
701 1819 : first_time_=false;
702 : }
703 :
704 : // clean temporary vectors
705 2142782 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
706 30541010 : for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
707 :
708 : // we have to cycle over all model and data GMM components in the neighbor list
709 1819 : unsigned GMM_d_w_size = GMM_d_w_.size();
710 1819 : unsigned GMM_m_w_size = GMM_m_w_.size();
711 20350694 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
712 : // get indexes of data and model component
713 10173528 : unsigned id = nl_[i] / GMM_m_w_size;
714 10173528 : unsigned im = nl_[i] % GMM_m_w_size;
715 : // get atom type
716 20347056 : unsigned jtype = GMM_m_type_[im];
717 : // get index in auxiliary lists
718 10173528 : unsigned kaux = jtype * GMM_d_w_size + id;
719 : // get prefactor and multiply by weights
720 40694112 : double pre_fact = fact_md_[kaux] * GMM_d_w_[id] * GMM_m_w_[im];
721 : // add overlap with im component of model GMM
722 30520584 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact,
723 : inv_cov_md_[kaux], ovmd_der_[i]);
724 : }
725 : // communicate stuff
726 3638 : comm.Sum(&ovmd_[0], ovmd_.size());
727 3638 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
728 1819 : }
729 :
730 :
731 1819 : void EMMI::calculate() {
732 :
733 : // calculate CV
734 1819 : calculate_overlap();
735 :
736 1819 : if(!analysis_) {
737 :
738 : // BIASING MODE
739 : // rescale factor for ensemble average
740 1819 : double escale = 1.0 / static_cast<double>(nrep_);
741 :
742 : // calculate average of ovmd_ across replicas
743 1819 : if(!no_aver_) {
744 0 : if(comm.Get_rank()==0) {
745 0 : multi_sim_comm.Sum(&ovmd_[0], ovmd_.size());
746 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] *= escale;
747 : } else {
748 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
749 : }
750 0 : comm.Sum(&ovmd_[0], ovmd_.size());
751 : }
752 :
753 : // calculate score and scoreb
754 : double ene = 0.0;
755 : double ene_b = 0.0;
756 2142782 : for(unsigned i=0; i<ovmd_.size(); ++i) {
757 : // calculate and store err function
758 2852192 : err_f_[i] = erf ( ( ovmd_[i]-ovdd_[i] ) * inv_sqrt2_ / sigma_mean_[i] );
759 : // energy term
760 2852192 : double ene_tmp = -kbt_ * std::log ( 0.5 / (ovmd_[i]-ovdd_[i]) * err_f_[i]) ;
761 : // increment energy
762 713048 : if(GMM_d_beta_[i] == 1) ene_b += ene_tmp;
763 0 : else ene += ene_tmp;
764 : }
765 :
766 : // multiply by number of replicas
767 1819 : ene /= escale;
768 1819 : ene_b /= escale;
769 :
770 : // clear temporary vector
771 3288752 : for(unsigned i=0; i<atom_der_.size(); ++i) {
772 2190076 : atom_der_[i] = Vector(0,0,0);
773 2190076 : atom_der_b_[i] = Vector(0,0,0);
774 : }
775 : // virial
776 1819 : Tensor virial, virialb;
777 :
778 : // get derivatives of bias with respect to atoms
779 20350694 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
780 : // get indexes of data and model component
781 20347056 : unsigned id = nl_[i] / GMM_m_w_.size();
782 10173528 : unsigned im = nl_[i] % GMM_m_w_.size();
783 : // first part of derivative
784 61041168 : double der = - kbt_/err_f_[id]*sqrt2_pi_*exp(-0.5*(ovmd_[id]-ovdd_[id])*(ovmd_[id]-ovdd_[id])/sigma_mean_[id]/sigma_mean_[id])/sigma_mean_[id];
785 : // second part
786 30520584 : der += kbt_ / (ovmd_[id]-ovdd_[id]);
787 : // chain rule
788 10173528 : Vector tot_der = der * ovmd_der_[i];
789 : // atom's position in GMM cell
790 10173528 : Vector pos;
791 10173528 : if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
792 20347056 : else pos = getPosition(im);
793 : // add derivative and virial
794 10173528 : if(GMM_d_beta_[id] == 1) {
795 10173528 : atom_der_b_[im] += tot_der;
796 10173528 : virialb += Tensor(pos, -tot_der);
797 : } else {
798 : atom_der_[im] += tot_der;
799 0 : virial += Tensor(pos, -tot_der);
800 : }
801 : }
802 :
803 : // communicate stuff
804 3638 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
805 3638 : comm.Sum(&atom_der_b_[0][0], 3*atom_der_b_.size());
806 1819 : comm.Sum(virial);
807 1819 : comm.Sum(virialb);
808 :
809 : // set derivatives
810 3288752 : for(unsigned i=0; i<atom_der_.size(); ++i) {
811 3285114 : setAtomsDerivatives(getPntrToComponent("score"), i, atom_der_[i]);
812 2190076 : setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_b_[i]);
813 : }
814 :
815 : // and set virial
816 3638 : setBoxDerivatives(getPntrToComponent("score"), virial);
817 3638 : setBoxDerivatives(getPntrToComponent("scoreb"), virialb);
818 :
819 : // set value of the score
820 3638 : getPntrToComponent("score")->set(ene);
821 : // set value of the beta score
822 3638 : getPntrToComponent("scoreb")->set(ene_b);
823 :
824 : } else {
825 :
826 : // ANALYSIS MODE
827 : // prepare stuff for the first time
828 0 : if(nframe_ <= 0.0) {
829 0 : Devfile_.link(*this);
830 0 : Devfile_.open("ovmd_deviations.dat");
831 : Devfile_.setHeavyFlush();
832 0 : Devfile_.fmtField("%12.6f");
833 0 : ovmd_ave_.resize(GMM_d_w_.size());
834 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
835 : }
836 :
837 : // increment number of frames
838 0 : nframe_ += 1.0;
839 :
840 : // add average ovmd_
841 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] += ovmd_[i];
842 :
843 : // print stuff
844 0 : for(unsigned i=0; i<ovmd_.size(); ++i) {
845 : // convert i to string
846 0 : stringstream ss;
847 : ss << i;
848 : // labels
849 0 : string label = "ovmd_" + ss.str();
850 : // print entry
851 0 : double ave = ovmd_ave_[i] / nframe_;
852 0 : double dev2 = (ave-ovdd_[i])*(ave-ovdd_[i])/ovdd_[i]/ovdd_[i];
853 0 : double dev = sqrt(dev2);
854 0 : Devfile_.printField(label, dev);
855 : }
856 0 : Devfile_.printField();
857 : }
858 :
859 1819 : }
860 :
861 : }
862 5517 : }
|