Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "bias/Bias.h"
24 : #include "bias/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "core/Atoms.h"
27 : #include "core/Value.h"
28 : #include "tools/File.h"
29 : #include "tools/OpenMP.h"
30 : #include "tools/Random.h"
31 : #include <chrono>
32 : #include <numeric>
33 :
34 : #ifndef M_PI
35 : #define M_PI 3.14159265358979323846
36 : #endif
37 :
38 : namespace PLMD {
39 : namespace isdb {
40 :
41 : //+PLUMEDOC ISDB_BIAS METAINFERENCE
42 : /*
43 : Calculates the Metainference energy for a set of experimental data.
44 :
45 : Metainference \cite Bonomi:2016ip is a Bayesian framework
46 : to model heterogeneous systems by integrating prior information with noisy, ensemble-averaged data.
47 : Metainference models a system and quantifies the level of noise in the data by considering a set of replicas of the system.
48 :
49 : Calculated experimental data are given in input as ARG while reference experimental values
50 : can be given either from fixed components of other actions using PARARG or as numbers using
51 : PARAMETERS. The default behavior is that of averaging the data over the available replicas,
52 : if this is not wanted the keyword NOENSEMBLE prevent this averaging.
53 :
54 : Metadynamics Metainference \cite Bonomi:2016ge or more in general biased Metainference requires the knowledge of
55 : biasing potential in order to calculate the weighted average. In this case the value of the bias
56 : can be provided as the last argument in ARG and adding the keyword REWEIGHT. To avoid the noise
57 : resulting from the instantaneous value of the bias the weight of each replica can be averaged
58 : over a give time using the keyword AVERAGING.
59 :
60 : The data can be averaged by using multiple replicas and weighted for a bias if present.
61 : The functional form of Metainference can be chosen among four variants selected
62 : with NOISE=GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC which correspond to modelling the noise for
63 : the arguments as a single gaussian common to all the data points, a gaussian per data
64 : point, a single long-tailed gaussian common to all the data points, a log-tailed
65 : gaussian per data point or using two distinct noises as for the most general formulation of Metainference.
66 : In this latter case the noise of the replica-averaging is gaussian (one per data point) and the noise for
67 : the comparison with the experimental data can chosen using the keyword LIKELIHOOD
68 : between gaussian or log-normal (one per data point), furthermore the evolution of the estimated average
69 : over an infinite number of replicas is driven by DFTILDE.
70 :
71 : As for Metainference theory there are two sigma values: SIGMA_MEAN0 represent the
72 : error of calculating an average quantity using a finite set of replica and should
73 : be set as small as possible following the guidelines for replica-averaged simulations
74 : in the framework of the Maximum Entropy Principle. Alternatively, this can be obtained
75 : automatically using the internal sigma mean optimization as introduced in \cite Lohr:2017gc
76 : (OPTSIGMAMEAN=SEM), in this second case sigma_mean is estimated from the maximum standard error
77 : of the mean either over the simulation or over a defined time using the keyword AVERAGING.
78 : SIGMA_BIAS is an uncertainty parameter, sampled by a MC algorithm in the bounded interval
79 : defined by SIGMA_MIN and SIGMA_MAX. The initial value is set at SIGMA0. The MC move is a
80 : random displacement of maximum value equal to DSIGMA. If the number of data point is
81 : too large and the acceptance rate drops it is possible to make the MC move over mutually
82 : exclusive, random subset of size MC_CHUNKSIZE and run more than one move setting MC_STEPS
83 : in such a way that MC_CHUNKSIZE*MC_STEPS will cover all the data points.
84 :
85 : Calculated and experimental data can be compared modulo a scaling factor and/or an offset
86 : using SCALEDATA and/or ADDOFFSET, the sampling is obtained by a MC algorithm either using
87 : a flat or a gaussian prior setting it with SCALE_PRIOR or OFFSET_PRIOR.
88 :
89 : \par Examples
90 :
91 : In the following example we calculate a set of \ref RDC, take the replica-average of
92 : them and comparing them with a set of experimental values. RDCs are compared with
93 : the experimental data but for a multiplication factor SCALE that is also sampled by
94 : MC on-the-fly
95 :
96 : \plumedfile
97 : RDC ...
98 : LABEL=rdc
99 : SCALE=0.0001
100 : GYROM=-72.5388
101 : ATOMS1=22,23
102 : ATOMS2=25,27
103 : ATOMS3=29,31
104 : ATOMS4=33,34
105 : ... RDC
106 :
107 : METAINFERENCE ...
108 : ARG=rdc.*
109 : NOISETYPE=MGAUSS
110 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
111 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
112 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
113 : SIGMA_MEAN0=0.001
114 : LABEL=spe
115 : ... METAINFERENCE
116 :
117 : PRINT ARG=spe.bias FILE=BIAS STRIDE=1
118 : \endplumedfile
119 :
120 : in the following example instead of using one uncertainty parameter per data point we use
121 : a single uncertainty value in a long-tailed gaussian to take into account for outliers, furthermore
122 : the data are weighted for the bias applied to other variables of the system.
123 :
124 : \plumedfile
125 : RDC ...
126 : LABEL=rdc
127 : SCALE=0.0001
128 : GYROM=-72.5388
129 : ATOMS1=22,23
130 : ATOMS2=25,27
131 : ATOMS3=29,31
132 : ATOMS4=33,34
133 : ... RDC
134 :
135 : cv1: TORSION ATOMS=1,2,3,4
136 : cv2: TORSION ATOMS=2,3,4,5
137 : mm: METAD ARG=cv1,cv2 HEIGHT=0.5 SIGMA=0.3,0.3 PACE=200 BIASFACTOR=8 WALKERS_MPI
138 :
139 : METAINFERENCE ...
140 : #SETTINGS NREPLICAS=2
141 : ARG=rdc.*,mm.bias
142 : REWEIGHT
143 : NOISETYPE=OUTLIERS
144 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
145 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
146 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
147 : SIGMA_MEAN0=0.001
148 : LABEL=spe
149 : ... METAINFERENCE
150 : \endplumedfile
151 :
152 : (See also \ref RDC, \ref PBMETAD).
153 :
154 : */
155 : //+ENDPLUMEDOC
156 :
157 : class Metainference : public bias::Bias {
158 : // experimental values
159 : std::vector<double> parameters;
160 : // noise type
161 : unsigned noise_type_;
162 : enum { GAUSS, MGAUSS, OUTLIERS, MOUTLIERS, GENERIC };
163 : unsigned gen_likelihood_;
164 : enum { LIKE_GAUSS, LIKE_LOGN };
165 : // scale is data scaling factor
166 : // noise type
167 : unsigned scale_prior_;
168 : enum { SC_GAUSS, SC_FLAT };
169 : bool doscale_;
170 : double scale_;
171 : double scale_mu_;
172 : double scale_min_;
173 : double scale_max_;
174 : double Dscale_;
175 : // scale is data scaling factor
176 : // noise type
177 : unsigned offset_prior_;
178 : bool dooffset_;
179 : double offset_;
180 : double offset_mu_;
181 : double offset_min_;
182 : double offset_max_;
183 : double Doffset_;
184 : // scale and offset regression
185 : bool doregres_zero_;
186 : int nregres_zero_;
187 : // sigma is data uncertainty
188 : std::vector<double> sigma_;
189 : std::vector<double> sigma_min_;
190 : std::vector<double> sigma_max_;
191 : std::vector<double> Dsigma_;
192 : // sigma_mean is uncertainty in the mean estimate
193 : std::vector<double> sigma_mean2_;
194 : // this is the estimator of the mean value per replica for generic metainference
195 : std::vector<double> ftilde_;
196 : double Dftilde_;
197 :
198 : // temperature in kbt
199 : double kbt_;
200 :
201 : // Monte Carlo stuff
202 : std::vector<Random> random;
203 : unsigned MCsteps_;
204 : long long unsigned MCaccept_;
205 : long long unsigned MCacceptScale_;
206 : long long unsigned MCacceptFT_;
207 : long long unsigned MCtrial_;
208 : unsigned MCchunksize_;
209 :
210 : // output
211 : Value* valueScale;
212 : Value* valueOffset;
213 : Value* valueAccept;
214 : Value* valueAcceptScale;
215 : Value* valueAcceptFT;
216 : std::vector<Value*> valueSigma;
217 : std::vector<Value*> valueSigmaMean;
218 : std::vector<Value*> valueFtilde;
219 :
220 : // restart
221 : unsigned write_stride_;
222 : OFile sfile_;
223 :
224 : // others
225 : bool firstTime;
226 : std::vector<bool> firstTimeW;
227 : bool master;
228 : bool do_reweight_;
229 : unsigned do_optsigmamean_;
230 : unsigned nrep_;
231 : unsigned replica_;
232 : unsigned narg;
233 :
234 : // selector
235 : std::string selector_;
236 :
237 : // optimize sigma mean
238 : std::vector< std::vector < std::vector <double> > > sigma_mean2_last_;
239 : unsigned optsigmamean_stride_;
240 : // optimize sigma max
241 : unsigned N_optimized_step_;
242 : unsigned optimized_step_;
243 : bool sigmamax_opt_done_;
244 : std::vector<double> sigma_max_est_;
245 :
246 : // average weights
247 : unsigned average_weights_stride_;
248 : std::vector< std::vector <double> > average_weights_;
249 :
250 : double getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
251 : const double scale, const double offset);
252 : double getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
253 : const double scale, const double offset);
254 : double getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
255 : const double scale, const double offset);
256 : double getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
257 : const double scale, const double offset);
258 : double getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
259 : const double scale, const double offset);
260 : void moveTilde(const std::vector<double> &mean_, double &old_energy);
261 : void moveScaleOffset(const std::vector<double> &mean_, double &old_energy);
262 : void moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow);
263 : double doMonteCarlo(const std::vector<double> &mean);
264 : void getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
265 : void getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
266 : void getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
267 : void getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
268 : void getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
269 : void get_weights(const unsigned iselect, double &weight, double &norm, double &neff);
270 : void replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b);
271 : void get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean);
272 : void writeStatus();
273 : void do_regression_zero(const std::vector<double> &mean);
274 :
275 : public:
276 : explicit Metainference(const ActionOptions&);
277 : ~Metainference();
278 : void calculate() override;
279 : void update() override;
280 : static void registerKeywords(Keywords& keys);
281 : };
282 :
283 :
284 13849 : PLUMED_REGISTER_ACTION(Metainference,"METAINFERENCE")
285 :
286 36 : void Metainference::registerKeywords(Keywords& keys) {
287 36 : Bias::registerKeywords(keys);
288 36 : keys.use("ARG");
289 72 : keys.add("optional","PARARG","reference values for the experimental data, these can be provided as arguments without derivatives");
290 72 : keys.add("optional","PARAMETERS","reference values for the experimental data");
291 72 : keys.addFlag("NOENSEMBLE",false,"don't perform any replica-averaging");
292 72 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the latest ARG as energy");
293 72 : keys.add("optional","AVERAGING", "Stride for calculation of averaged weights and sigma_mean");
294 72 : keys.add("compulsory","NOISETYPE","MGAUSS","functional form of the noise (GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC)");
295 72 : keys.add("compulsory","LIKELIHOOD","GAUSS","the likelihood for the GENERIC metainference model, GAUSS or LOGN");
296 72 : keys.add("compulsory","DFTILDE","0.1","fraction of sigma_mean used to evolve ftilde");
297 72 : keys.addFlag("SCALEDATA",false,"Set to TRUE if you want to sample a scaling factor common to all values and replicas");
298 72 : keys.add("compulsory","SCALE0","1.0","initial value of the scaling factor");
299 72 : keys.add("compulsory","SCALE_PRIOR","FLAT","either FLAT or GAUSSIAN");
300 72 : keys.add("optional","SCALE_MIN","minimum value of the scaling factor");
301 72 : keys.add("optional","SCALE_MAX","maximum value of the scaling factor");
302 72 : keys.add("optional","DSCALE","maximum MC move of the scaling factor");
303 72 : keys.addFlag("ADDOFFSET",false,"Set to TRUE if you want to sample an offset common to all values and replicas");
304 72 : keys.add("compulsory","OFFSET0","0.0","initial value of the offset");
305 72 : keys.add("compulsory","OFFSET_PRIOR","FLAT","either FLAT or GAUSSIAN");
306 72 : keys.add("optional","OFFSET_MIN","minimum value of the offset");
307 72 : keys.add("optional","OFFSET_MAX","maximum value of the offset");
308 72 : keys.add("optional","DOFFSET","maximum MC move of the offset");
309 72 : keys.add("optional","REGRES_ZERO","stride for regression with zero offset");
310 72 : keys.add("compulsory","SIGMA0","1.0","initial value of the uncertainty parameter");
311 72 : keys.add("compulsory","SIGMA_MIN","0.0","minimum value of the uncertainty parameter");
312 72 : keys.add("compulsory","SIGMA_MAX","10.","maximum value of the uncertainty parameter");
313 72 : keys.add("optional","DSIGMA","maximum MC move of the uncertainty parameter");
314 72 : keys.add("compulsory","OPTSIGMAMEAN","NONE","Set to NONE/SEM to manually set sigma mean, or to estimate it on the fly");
315 72 : keys.add("optional","SIGMA_MEAN0","starting value for the uncertainty in the mean estimate");
316 72 : keys.add("optional","SIGMA_MAX_STEPS", "Number of steps used to optimise SIGMA_MAX, before that the SIGMA_MAX value is used");
317 72 : keys.add("optional","TEMP","the system temperature - this is only needed if code doesn't pass the temperature to plumed");
318 72 : keys.add("optional","MC_STEPS","number of MC steps");
319 72 : keys.add("optional","MC_CHUNKSIZE","MC chunksize");
320 72 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart/continuation of Metainference");
321 72 : keys.add("optional","FMT","specify format for STATUS files (useful for decrease the number of digits in regtests)");
322 72 : keys.add("compulsory","WRITE_STRIDE","10000","write the status to a file every N steps, this can be used for restart/continuation");
323 72 : keys.add("optional","SELECTOR","name of selector");
324 72 : keys.add("optional","NSELECT","range of values for selector [0, N-1]");
325 36 : keys.use("RESTART");
326 72 : keys.addOutputComponent("sigma", "default", "uncertainty parameter");
327 72 : keys.addOutputComponent("sigmaMean", "default", "uncertainty in the mean estimate");
328 72 : keys.addOutputComponent("neff", "default", "effective number of replicas");
329 72 : keys.addOutputComponent("acceptSigma", "default", "MC acceptance for sigma values");
330 72 : keys.addOutputComponent("acceptScale", "SCALEDATA", "MC acceptance for scale value");
331 72 : keys.addOutputComponent("acceptFT", "GENERIC", "MC acceptance for general metainference f tilde value");
332 72 : keys.addOutputComponent("weight", "REWEIGHT", "weights of the weighted average");
333 72 : keys.addOutputComponent("biasDer", "REWEIGHT", "derivatives with respect to the bias");
334 72 : keys.addOutputComponent("scale", "SCALEDATA", "scale parameter");
335 72 : keys.addOutputComponent("offset", "ADDOFFSET", "offset parameter");
336 72 : keys.addOutputComponent("ftilde", "GENERIC", "ensemble average estimator");
337 36 : }
338 :
339 32 : Metainference::Metainference(const ActionOptions&ao):
340 : PLUMED_BIAS_INIT(ao),
341 32 : doscale_(false),
342 32 : scale_(1.),
343 32 : scale_mu_(0),
344 32 : scale_min_(1),
345 32 : scale_max_(-1),
346 32 : Dscale_(-1),
347 32 : dooffset_(false),
348 32 : offset_(0.),
349 32 : offset_mu_(0),
350 32 : offset_min_(1),
351 32 : offset_max_(-1),
352 32 : Doffset_(-1),
353 32 : doregres_zero_(false),
354 32 : nregres_zero_(0),
355 32 : Dftilde_(0.1),
356 32 : random(3),
357 32 : MCsteps_(1),
358 32 : MCaccept_(0),
359 32 : MCacceptScale_(0),
360 32 : MCacceptFT_(0),
361 32 : MCtrial_(0),
362 32 : MCchunksize_(0),
363 32 : write_stride_(0),
364 32 : firstTime(true),
365 32 : do_reweight_(false),
366 32 : do_optsigmamean_(0),
367 32 : optsigmamean_stride_(0),
368 32 : N_optimized_step_(0),
369 32 : optimized_step_(0),
370 32 : sigmamax_opt_done_(false),
371 32 : average_weights_stride_(1) {
372 32 : bool noensemble = false;
373 32 : parseFlag("NOENSEMBLE", noensemble);
374 :
375 : // set up replica stuff
376 32 : master = (comm.Get_rank()==0);
377 32 : if(master) {
378 24 : nrep_ = multi_sim_comm.Get_size();
379 24 : replica_ = multi_sim_comm.Get_rank();
380 24 : if(noensemble) {
381 0 : nrep_ = 1;
382 : }
383 : } else {
384 8 : nrep_ = 0;
385 8 : replica_ = 0;
386 : }
387 32 : comm.Sum(&nrep_,1);
388 32 : comm.Sum(&replica_,1);
389 :
390 32 : unsigned nsel = 1;
391 32 : parse("SELECTOR", selector_);
392 64 : parse("NSELECT", nsel);
393 : // do checks
394 32 : if(selector_.length()>0 && nsel<=1) {
395 0 : error("With SELECTOR active, NSELECT must be greater than 1");
396 : }
397 32 : if(selector_.length()==0 && nsel>1) {
398 0 : error("With NSELECT greater than 1, you must specify SELECTOR");
399 : }
400 :
401 : // initialise firstTimeW
402 32 : firstTimeW.resize(nsel, true);
403 :
404 : // reweight implies a different number of arguments (the latest one must always be the bias)
405 32 : parseFlag("REWEIGHT", do_reweight_);
406 32 : if(do_reweight_&&nrep_<2) {
407 0 : error("REWEIGHT can only be used in parallel with 2 or more replicas");
408 : }
409 32 : if(!getRestart()) {
410 56 : average_weights_.resize(nsel, std::vector<double> (nrep_, 1./static_cast<double>(nrep_)));
411 : } else {
412 8 : average_weights_.resize(nsel, std::vector<double> (nrep_, 0.));
413 : }
414 32 : narg = getNumberOfArguments();
415 32 : if(do_reweight_) {
416 16 : narg--;
417 : }
418 :
419 32 : unsigned averaging=0;
420 32 : parse("AVERAGING", averaging);
421 32 : if(averaging>0) {
422 1 : average_weights_stride_ = averaging;
423 1 : optsigmamean_stride_ = averaging;
424 : }
425 :
426 64 : parseVector("PARAMETERS",parameters);
427 32 : if(parameters.size()!=static_cast<unsigned>(narg)&&!parameters.empty()) {
428 0 : error("Size of PARAMETERS array should be either 0 or the same as of the number of arguments in ARG1");
429 : }
430 :
431 : std::vector<Value*> arg2;
432 64 : parseArgumentList("PARARG",arg2);
433 32 : if(!arg2.empty()) {
434 4 : if(parameters.size()>0) {
435 0 : error("It is not possible to use PARARG and PARAMETERS together");
436 : }
437 4 : if(arg2.size()!=narg) {
438 0 : error("Size of PARARG array should be the same as number for arguments in ARG");
439 : }
440 2360 : for(unsigned i=0; i<arg2.size(); i++) {
441 2356 : parameters.push_back(arg2[i]->get());
442 2356 : if(arg2[i]->hasDerivatives()==true) {
443 0 : error("PARARG can only accept arguments without derivatives");
444 : }
445 : }
446 : }
447 :
448 32 : if(parameters.size()!=narg) {
449 0 : error("PARARG or PARAMETERS arrays should include the same number of elements as the arguments in ARG");
450 : }
451 :
452 : std::string stringa_noise;
453 64 : parse("NOISETYPE",stringa_noise);
454 32 : if(stringa_noise=="GAUSS") {
455 3 : noise_type_ = GAUSS;
456 29 : } else if(stringa_noise=="MGAUSS") {
457 8 : noise_type_ = MGAUSS;
458 21 : } else if(stringa_noise=="OUTLIERS") {
459 14 : noise_type_ = OUTLIERS;
460 7 : } else if(stringa_noise=="MOUTLIERS") {
461 4 : noise_type_ = MOUTLIERS;
462 3 : } else if(stringa_noise=="GENERIC") {
463 3 : noise_type_ = GENERIC;
464 : } else {
465 0 : error("Unknown noise type!");
466 : }
467 :
468 32 : if(noise_type_== GENERIC) {
469 : std::string stringa_like;
470 6 : parse("LIKELIHOOD",stringa_like);
471 3 : if(stringa_like=="GAUSS") {
472 2 : gen_likelihood_ = LIKE_GAUSS;
473 1 : } else if(stringa_like=="LOGN") {
474 1 : gen_likelihood_ = LIKE_LOGN;
475 : } else {
476 0 : error("Unknown likelihood type!");
477 : }
478 :
479 6 : parse("DFTILDE",Dftilde_);
480 : }
481 :
482 64 : parse("WRITE_STRIDE",write_stride_);
483 : std::string status_file_name_;
484 64 : parse("STATUS_FILE",status_file_name_);
485 32 : if(status_file_name_=="") {
486 64 : status_file_name_ = "MISTATUS"+getLabel();
487 : } else {
488 0 : status_file_name_ = status_file_name_+getLabel();
489 : }
490 :
491 : std::string stringa_optsigma;
492 64 : parse("OPTSIGMAMEAN", stringa_optsigma);
493 32 : if(stringa_optsigma=="NONE") {
494 30 : do_optsigmamean_=0;
495 2 : } else if(stringa_optsigma=="SEM") {
496 0 : do_optsigmamean_=1;
497 2 : } else if(stringa_optsigma=="SEM_MAX") {
498 0 : do_optsigmamean_=2;
499 : }
500 :
501 32 : unsigned aver_max_steps=0;
502 32 : parse("SIGMA_MAX_STEPS", aver_max_steps);
503 32 : if(aver_max_steps==0&&do_optsigmamean_==2) {
504 0 : aver_max_steps=averaging*2000;
505 : }
506 32 : if(aver_max_steps>0&&do_optsigmamean_<2) {
507 0 : error("SIGMA_MAX_STEPS can only be used together with OPTSIGMAMEAN=SEM_MAX");
508 : }
509 32 : if(aver_max_steps>0&&do_optsigmamean_==2) {
510 0 : N_optimized_step_=aver_max_steps;
511 : }
512 32 : if(aver_max_steps>0&&aver_max_steps<averaging) {
513 0 : error("SIGMA_MAX_STEPS must be greater than AVERAGING");
514 : }
515 :
516 : // resize std::vector for sigma_mean history
517 32 : sigma_mean2_last_.resize(nsel);
518 64 : for(unsigned i=0; i<nsel; i++) {
519 32 : sigma_mean2_last_[i].resize(narg);
520 : }
521 :
522 : std::vector<double> read_sigma_mean_;
523 32 : parseVector("SIGMA_MEAN0",read_sigma_mean_);
524 32 : if(do_optsigmamean_==0 && read_sigma_mean_.size()==0 && !getRestart()) {
525 0 : error("If you don't use OPTSIGMAMEAN and you are not RESTARTING then you MUST SET SIGMA_MEAN0");
526 : }
527 :
528 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
529 15 : if(read_sigma_mean_.size()==narg) {
530 0 : sigma_mean2_.resize(narg);
531 0 : for(unsigned i=0; i<narg; i++) {
532 0 : sigma_mean2_[i]=read_sigma_mean_[i]*read_sigma_mean_[i];
533 : }
534 15 : } else if(read_sigma_mean_.size()==1) {
535 15 : sigma_mean2_.resize(narg,read_sigma_mean_[0]*read_sigma_mean_[0]);
536 0 : } else if(read_sigma_mean_.size()==0) {
537 0 : sigma_mean2_.resize(narg,0.000001);
538 : } else {
539 0 : error("SIGMA_MEAN0 can accept either one single value or as many values as the arguments (with NOISETYPE=MGAUSS|MOUTLIERS)");
540 : }
541 : // set the initial value for the history
542 30 : for(unsigned i=0; i<nsel; i++)
543 2409 : for(unsigned j=0; j<narg; j++) {
544 2394 : sigma_mean2_last_[i][j].push_back(sigma_mean2_[j]);
545 : }
546 : } else {
547 17 : if(read_sigma_mean_.size()==1) {
548 17 : sigma_mean2_.resize(1, read_sigma_mean_[0]*read_sigma_mean_[0]);
549 0 : } else if(read_sigma_mean_.size()==0) {
550 0 : sigma_mean2_.resize(1, 0.000001);
551 : } else {
552 0 : error("If you want to use more than one SIGMA_MEAN0 you should use NOISETYPE=MGAUSS|MOUTLIERS");
553 : }
554 : // set the initial value for the history
555 34 : for(unsigned i=0; i<nsel; i++)
556 109 : for(unsigned j=0; j<narg; j++) {
557 92 : sigma_mean2_last_[i][j].push_back(sigma_mean2_[0]);
558 : }
559 : }
560 :
561 32 : parseFlag("SCALEDATA", doscale_);
562 32 : if(doscale_) {
563 : std::string stringa_noise;
564 24 : parse("SCALE_PRIOR",stringa_noise);
565 12 : if(stringa_noise=="GAUSSIAN") {
566 0 : scale_prior_ = SC_GAUSS;
567 12 : } else if(stringa_noise=="FLAT") {
568 12 : scale_prior_ = SC_FLAT;
569 : } else {
570 0 : error("Unknown SCALE_PRIOR type!");
571 : }
572 12 : parse("SCALE0",scale_);
573 12 : parse("DSCALE",Dscale_);
574 12 : if(Dscale_<0.) {
575 0 : error("DSCALE must be set when using SCALEDATA");
576 : }
577 12 : if(scale_prior_==SC_GAUSS) {
578 0 : scale_mu_=scale_;
579 : } else {
580 12 : parse("SCALE_MIN",scale_min_);
581 12 : parse("SCALE_MAX",scale_max_);
582 12 : if(scale_max_<scale_min_) {
583 0 : error("SCALE_MAX and SCALE_MIN must be set when using SCALE_PRIOR=FLAT");
584 : }
585 : }
586 : }
587 :
588 32 : parseFlag("ADDOFFSET", dooffset_);
589 32 : if(dooffset_) {
590 : std::string stringa_noise;
591 12 : parse("OFFSET_PRIOR",stringa_noise);
592 6 : if(stringa_noise=="GAUSSIAN") {
593 0 : offset_prior_ = SC_GAUSS;
594 6 : } else if(stringa_noise=="FLAT") {
595 6 : offset_prior_ = SC_FLAT;
596 : } else {
597 0 : error("Unknown OFFSET_PRIOR type!");
598 : }
599 6 : parse("OFFSET0",offset_);
600 6 : parse("DOFFSET",Doffset_);
601 6 : if(offset_prior_==SC_GAUSS) {
602 0 : offset_mu_=offset_;
603 0 : if(Doffset_<0.) {
604 0 : error("DOFFSET must be set when using OFFSET_PRIOR=GAUSS");
605 : }
606 : } else {
607 6 : parse("OFFSET_MIN",offset_min_);
608 6 : parse("OFFSET_MAX",offset_max_);
609 6 : if(Doffset_<0) {
610 0 : Doffset_ = 0.05*(offset_max_ - offset_min_);
611 : }
612 6 : if(offset_max_<offset_min_) {
613 0 : error("OFFSET_MAX and OFFSET_MIN must be set when using OFFSET_PRIOR=FLAT");
614 : }
615 : }
616 : }
617 :
618 : // regression with zero intercept
619 32 : parse("REGRES_ZERO", nregres_zero_);
620 32 : if(nregres_zero_>0) {
621 : // set flag
622 1 : doregres_zero_=true;
623 : // check if already sampling scale and offset
624 1 : if(doscale_) {
625 0 : error("REGRES_ZERO and SCALEDATA are mutually exclusive");
626 : }
627 1 : if(dooffset_) {
628 0 : error("REGRES_ZERO and ADDOFFSET are mutually exclusive");
629 : }
630 : }
631 :
632 : std::vector<double> readsigma;
633 32 : parseVector("SIGMA0",readsigma);
634 32 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma.size()>1) {
635 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
636 : }
637 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
638 15 : sigma_.resize(readsigma.size());
639 15 : sigma_=readsigma;
640 : } else {
641 17 : sigma_.resize(1, readsigma[0]);
642 : }
643 :
644 : std::vector<double> readsigma_min;
645 32 : parseVector("SIGMA_MIN",readsigma_min);
646 32 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_min.size()>1) {
647 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
648 : }
649 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
650 15 : sigma_min_.resize(readsigma_min.size());
651 15 : sigma_min_=readsigma_min;
652 : } else {
653 17 : sigma_min_.resize(1, readsigma_min[0]);
654 : }
655 :
656 : std::vector<double> readsigma_max;
657 32 : parseVector("SIGMA_MAX",readsigma_max);
658 32 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1) {
659 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
660 : }
661 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
662 15 : sigma_max_.resize(readsigma_max.size());
663 15 : sigma_max_=readsigma_max;
664 : } else {
665 17 : sigma_max_.resize(1, readsigma_max[0]);
666 : }
667 :
668 32 : if(sigma_max_.size()!=sigma_min_.size()) {
669 0 : error("The number of values for SIGMA_MIN and SIGMA_MAX must be the same");
670 : }
671 :
672 : std::vector<double> read_dsigma;
673 32 : parseVector("DSIGMA",read_dsigma);
674 32 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1) {
675 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
676 : }
677 32 : if(read_dsigma.size()>0) {
678 32 : Dsigma_.resize(read_dsigma.size());
679 32 : Dsigma_=read_dsigma;
680 : } else {
681 0 : Dsigma_.resize(sigma_max_.size(), -1.);
682 : /* in this case Dsigma is initialised after reading the restart file if present */
683 : }
684 :
685 : std::string fmt_;
686 32 : parse("FMT",fmt_);
687 :
688 : // monte carlo stuff
689 32 : parse("MC_STEPS",MCsteps_);
690 32 : parse("MC_CHUNKSIZE", MCchunksize_);
691 : // get temperature
692 32 : double temp=0.0;
693 32 : parse("TEMP",temp);
694 32 : if(temp>0.0) {
695 32 : kbt_=plumed.getAtoms().getKBoltzmann()*temp;
696 : } else {
697 0 : kbt_=plumed.getAtoms().getKbT();
698 : }
699 32 : if(kbt_==0.0) {
700 0 : error("Unless the MD engine passes the temperature to plumed, you must specify it using TEMP");
701 : }
702 :
703 32 : checkRead();
704 :
705 : // set sigma_bias
706 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
707 15 : if(sigma_.size()==1) {
708 15 : double tmp = sigma_[0];
709 15 : sigma_.resize(narg, tmp);
710 0 : } else if(sigma_.size()>1&&sigma_.size()!=narg) {
711 0 : error("SIGMA0 can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
712 : }
713 15 : if(sigma_min_.size()==1) {
714 15 : double tmp = sigma_min_[0];
715 15 : sigma_min_.resize(narg, tmp);
716 0 : } else if(sigma_min_.size()>1&&sigma_min_.size()!=narg) {
717 0 : error("SIGMA_MIN can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
718 : }
719 15 : if(sigma_max_.size()==1) {
720 15 : double tmp = sigma_max_[0];
721 15 : sigma_max_.resize(narg, tmp);
722 0 : } else if(sigma_max_.size()>1&&sigma_max_.size()!=narg) {
723 0 : error("SIGMA_MAX can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
724 : }
725 15 : if(Dsigma_.size()==1) {
726 15 : double tmp = Dsigma_[0];
727 15 : Dsigma_.resize(narg, tmp);
728 0 : } else if(Dsigma_.size()>1&&Dsigma_.size()!=narg) {
729 0 : error("DSIGMA can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
730 : }
731 : }
732 :
733 32 : sigma_max_est_.resize(sigma_max_.size(), 0.);
734 :
735 32 : IFile restart_sfile;
736 32 : restart_sfile.link(*this);
737 32 : if(getRestart()&&restart_sfile.FileExist(status_file_name_)) {
738 4 : firstTime = false;
739 8 : for(unsigned i=0; i<nsel; i++) {
740 : firstTimeW[i] = false;
741 : }
742 4 : restart_sfile.open(status_file_name_);
743 4 : log.printf(" Restarting from %s\n", status_file_name_.c_str());
744 : double dummy;
745 8 : if(restart_sfile.scanField("time",dummy)) {
746 : // check for syncronisation
747 4 : std::vector<double> dummy_time(nrep_,0);
748 4 : if(master&&nrep_>1) {
749 2 : dummy_time[replica_] = dummy;
750 2 : multi_sim_comm.Sum(dummy_time);
751 : }
752 4 : comm.Sum(dummy_time);
753 8 : for(unsigned i=1; i<nrep_; i++) {
754 4 : std::string msg = "METAINFERENCE restart files " + status_file_name_ + " are not in sync";
755 4 : if(dummy_time[i]!=dummy_time[0]) {
756 0 : plumed_merror(msg);
757 : }
758 : }
759 : // nsel
760 8 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
761 : std::string msg_i;
762 4 : Tools::convert(i,msg_i);
763 : // narg
764 4 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
765 20 : for(unsigned j=0; j<narg; ++j) {
766 : std::string msg_j;
767 16 : Tools::convert(j,msg_j);
768 16 : std::string msg = msg_i+"_"+msg_j;
769 : double read_sm;
770 16 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
771 16 : sigma_mean2_last_[i][j][0] = read_sm*read_sm;
772 : }
773 : }
774 4 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
775 : double read_sm;
776 : std::string msg_j;
777 0 : Tools::convert(0,msg_j);
778 0 : std::string msg = msg_i+"_"+msg_j;
779 0 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
780 0 : for(unsigned j=0; j<narg; j++) {
781 0 : sigma_mean2_last_[i][j][0] = read_sm*read_sm;
782 : }
783 : }
784 : }
785 :
786 20 : for(unsigned i=0; i<sigma_.size(); ++i) {
787 : std::string msg;
788 16 : Tools::convert(i,msg);
789 32 : restart_sfile.scanField("sigma_"+msg,sigma_[i]);
790 : }
791 20 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
792 : std::string msg;
793 16 : Tools::convert(i,msg);
794 16 : restart_sfile.scanField("sigma_max_"+msg,sigma_max_[i]);
795 16 : sigmamax_opt_done_=true;
796 : }
797 4 : if(noise_type_==GENERIC) {
798 0 : for(unsigned i=0; i<ftilde_.size(); ++i) {
799 : std::string msg;
800 0 : Tools::convert(i,msg);
801 0 : restart_sfile.scanField("ftilde_"+msg,ftilde_[i]);
802 : }
803 : }
804 4 : restart_sfile.scanField("scale0_",scale_);
805 4 : restart_sfile.scanField("offset0_",offset_);
806 :
807 8 : for(unsigned i=0; i<nsel; i++) {
808 : std::string msg;
809 4 : Tools::convert(i,msg);
810 : double tmp_w;
811 4 : restart_sfile.scanField("weight_"+msg,tmp_w);
812 4 : if(master) {
813 2 : average_weights_[i][replica_] = tmp_w;
814 2 : if(nrep_>1) {
815 2 : multi_sim_comm.Sum(&average_weights_[i][0], nrep_);
816 : }
817 : }
818 4 : comm.Sum(&average_weights_[i][0], nrep_);
819 : }
820 :
821 : }
822 4 : restart_sfile.scanField();
823 4 : restart_sfile.close();
824 : }
825 :
826 : /* If DSIGMA is not yet initialised do it now */
827 2443 : for(unsigned i=0; i<sigma_max_.size(); i++)
828 2411 : if(Dsigma_[i]==-1) {
829 0 : Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
830 : }
831 :
832 32 : switch(noise_type_) {
833 3 : case GENERIC:
834 3 : log.printf(" with general metainference ");
835 3 : if(gen_likelihood_==LIKE_GAUSS) {
836 2 : log.printf(" and a gaussian likelihood\n");
837 1 : } else if(gen_likelihood_==LIKE_LOGN) {
838 1 : log.printf(" and a log-normal likelihood\n");
839 : }
840 3 : log.printf(" ensemble average parameter sampled with a step %lf of sigma_mean\n", Dftilde_);
841 : break;
842 3 : case GAUSS:
843 3 : log.printf(" with gaussian noise and a single noise parameter for all the data\n");
844 : break;
845 8 : case MGAUSS:
846 8 : log.printf(" with gaussian noise and a noise parameter for each data point\n");
847 : break;
848 14 : case OUTLIERS:
849 14 : log.printf(" with long tailed gaussian noise and a single noise parameter for all the data\n");
850 : break;
851 4 : case MOUTLIERS:
852 4 : log.printf(" with long tailed gaussian noise and a noise parameter for each data point\n");
853 : break;
854 : }
855 :
856 32 : if(doscale_) {
857 : // check that the scale value is the same for all replicas
858 12 : std::vector<double> dummy_scale(nrep_,0);
859 12 : if(master&&nrep_>1) {
860 6 : dummy_scale[replica_] = scale_;
861 6 : multi_sim_comm.Sum(dummy_scale);
862 : }
863 12 : comm.Sum(dummy_scale);
864 24 : for(unsigned i=1; i<nrep_; i++) {
865 12 : std::string msg = "The SCALE value must be the same for all replicas: check your input or restart file";
866 12 : if(dummy_scale[i]!=dummy_scale[0]) {
867 0 : plumed_merror(msg);
868 : }
869 : }
870 12 : log.printf(" sampling a common scaling factor with:\n");
871 12 : log.printf(" initial scale parameter %f\n",scale_);
872 12 : if(scale_prior_==SC_GAUSS) {
873 0 : log.printf(" gaussian prior with mean %f and width %f\n",scale_mu_,Dscale_);
874 : }
875 12 : if(scale_prior_==SC_FLAT) {
876 12 : log.printf(" flat prior between %f - %f\n",scale_min_,scale_max_);
877 12 : log.printf(" maximum MC move of scale parameter %f\n",Dscale_);
878 : }
879 : }
880 :
881 32 : if(dooffset_) {
882 : // check that the offset value is the same for all replicas
883 6 : std::vector<double> dummy_offset(nrep_,0);
884 6 : if(master&&nrep_>1) {
885 0 : dummy_offset[replica_] = offset_;
886 0 : multi_sim_comm.Sum(dummy_offset);
887 : }
888 6 : comm.Sum(dummy_offset);
889 6 : for(unsigned i=1; i<nrep_; i++) {
890 0 : std::string msg = "The OFFSET value must be the same for all replicas: check your input or restart file";
891 0 : if(dummy_offset[i]!=dummy_offset[0]) {
892 0 : plumed_merror(msg);
893 : }
894 : }
895 6 : log.printf(" sampling a common offset with:\n");
896 6 : log.printf(" initial offset parameter %f\n",offset_);
897 6 : if(offset_prior_==SC_GAUSS) {
898 0 : log.printf(" gaussian prior with mean %f and width %f\n",offset_mu_,Doffset_);
899 : }
900 6 : if(offset_prior_==SC_FLAT) {
901 6 : log.printf(" flat prior between %f - %f\n",offset_min_,offset_max_);
902 6 : log.printf(" maximum MC move of offset parameter %f\n",Doffset_);
903 : }
904 : }
905 :
906 32 : if(doregres_zero_) {
907 1 : log.printf(" doing regression with zero intercept with stride: %d\n", nregres_zero_);
908 : }
909 :
910 32 : log.printf(" number of experimental data points %u\n",narg);
911 32 : log.printf(" number of replicas %u\n",nrep_);
912 32 : log.printf(" initial data uncertainties");
913 2443 : for(unsigned i=0; i<sigma_.size(); ++i) {
914 2411 : log.printf(" %f", sigma_[i]);
915 : }
916 32 : log.printf("\n");
917 32 : log.printf(" minimum data uncertainties");
918 2443 : for(unsigned i=0; i<sigma_.size(); ++i) {
919 2411 : log.printf(" %f",sigma_min_[i]);
920 : }
921 32 : log.printf("\n");
922 32 : log.printf(" maximum data uncertainties");
923 2443 : for(unsigned i=0; i<sigma_.size(); ++i) {
924 2411 : log.printf(" %f",sigma_max_[i]);
925 : }
926 32 : log.printf("\n");
927 32 : log.printf(" maximum MC move of data uncertainties");
928 2443 : for(unsigned i=0; i<sigma_.size(); ++i) {
929 2411 : log.printf(" %f",Dsigma_[i]);
930 : }
931 32 : log.printf("\n");
932 32 : log.printf(" temperature of the system %f\n",kbt_);
933 32 : log.printf(" MC steps %u\n",MCsteps_);
934 32 : log.printf(" initial standard errors of the mean");
935 2443 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) {
936 2411 : log.printf(" %f", std::sqrt(sigma_mean2_[i]));
937 : }
938 32 : log.printf("\n");
939 :
940 32 : if(do_reweight_) {
941 16 : addComponent("biasDer");
942 16 : componentIsNotPeriodic("biasDer");
943 16 : addComponent("weight");
944 32 : componentIsNotPeriodic("weight");
945 : }
946 :
947 32 : addComponent("neff");
948 32 : componentIsNotPeriodic("neff");
949 :
950 32 : if(doscale_ || doregres_zero_) {
951 13 : addComponent("scale");
952 13 : componentIsNotPeriodic("scale");
953 13 : valueScale=getPntrToComponent("scale");
954 : }
955 :
956 32 : if(dooffset_) {
957 6 : addComponent("offset");
958 6 : componentIsNotPeriodic("offset");
959 6 : valueOffset=getPntrToComponent("offset");
960 : }
961 :
962 32 : if(dooffset_||doscale_) {
963 18 : addComponent("acceptScale");
964 18 : componentIsNotPeriodic("acceptScale");
965 18 : valueAcceptScale=getPntrToComponent("acceptScale");
966 : }
967 :
968 32 : if(noise_type_==GENERIC) {
969 3 : addComponent("acceptFT");
970 3 : componentIsNotPeriodic("acceptFT");
971 3 : valueAcceptFT=getPntrToComponent("acceptFT");
972 : }
973 :
974 32 : addComponent("acceptSigma");
975 32 : componentIsNotPeriodic("acceptSigma");
976 32 : valueAccept=getPntrToComponent("acceptSigma");
977 :
978 32 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
979 2409 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) {
980 : std::string num;
981 2394 : Tools::convert(i,num);
982 2394 : addComponent("sigmaMean-"+num);
983 2394 : componentIsNotPeriodic("sigmaMean-"+num);
984 2394 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean-"+num));
985 2394 : getPntrToComponent("sigmaMean-"+num)->set(std::sqrt(sigma_mean2_[i]));
986 2394 : addComponent("sigma-"+num);
987 2394 : componentIsNotPeriodic("sigma-"+num);
988 2394 : valueSigma.push_back(getPntrToComponent("sigma-"+num));
989 2394 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
990 2394 : if(noise_type_==GENERIC) {
991 6 : addComponent("ftilde-"+num);
992 6 : componentIsNotPeriodic("ftilde-"+num);
993 6 : valueFtilde.push_back(getPntrToComponent("ftilde-"+num));
994 : }
995 : }
996 15 : } else {
997 17 : addComponent("sigmaMean");
998 17 : componentIsNotPeriodic("sigmaMean");
999 17 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean"));
1000 17 : getPntrToComponent("sigmaMean")->set(std::sqrt(sigma_mean2_[0]));
1001 17 : addComponent("sigma");
1002 17 : componentIsNotPeriodic("sigma");
1003 17 : valueSigma.push_back(getPntrToComponent("sigma"));
1004 34 : getPntrToComponent("sigma")->set(sigma_[0]);
1005 : }
1006 :
1007 : // initialize random seed
1008 : unsigned iseed;
1009 32 : if(master) {
1010 24 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
1011 24 : iseed = static_cast<unsigned>(ts)+replica_;
1012 : } else {
1013 8 : iseed = 0;
1014 : }
1015 32 : comm.Sum(&iseed, 1);
1016 : // this is used for ftilde and sigma both the move and the acceptance
1017 : // this is different for each replica
1018 32 : random[0].setSeed(-iseed);
1019 32 : if(doscale_||dooffset_) {
1020 : // in this case we want the same seed everywhere
1021 18 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
1022 18 : iseed = static_cast<unsigned>(ts);
1023 18 : if(master&&nrep_>1) {
1024 6 : multi_sim_comm.Bcast(iseed,0);
1025 : }
1026 18 : comm.Bcast(iseed,0);
1027 : // this is used for scale and offset sampling and acceptance
1028 18 : random[1].setSeed(-iseed);
1029 : }
1030 : // this is used for random chunk of sigmas, and it is different for each replica
1031 32 : if(master) {
1032 24 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
1033 24 : iseed = static_cast<unsigned>(ts)+replica_;
1034 : } else {
1035 8 : iseed = 0;
1036 : }
1037 32 : comm.Sum(&iseed, 1);
1038 32 : random[2].setSeed(-iseed);
1039 :
1040 : // outfile stuff
1041 32 : if(write_stride_>0) {
1042 32 : sfile_.link(*this);
1043 32 : sfile_.open(status_file_name_);
1044 32 : if(fmt_.length()>0) {
1045 1 : sfile_.fmtField(fmt_);
1046 : }
1047 : }
1048 :
1049 64 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
1050 32 : if(do_reweight_) {
1051 32 : log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
1052 : }
1053 32 : if(do_optsigmamean_>0) {
1054 0 : log<<plumed.cite("Loehr, Jussupow, Camilloni, J. Chem. Phys. 146, 165102 (2017)");
1055 : }
1056 64 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
1057 32 : log<<"\n";
1058 64 : }
1059 :
1060 64 : Metainference::~Metainference() {
1061 32 : if(sfile_.isOpen()) {
1062 32 : sfile_.close();
1063 : }
1064 192 : }
1065 :
1066 264 : double Metainference::getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
1067 : const double scale, const double offset) {
1068 264 : const double scale2 = scale*scale;
1069 264 : const double sm2 = sigma_mean2_[0];
1070 264 : const double ss2 = sigma[0]*sigma[0] + scale2*sm2;
1071 264 : const double sss = sigma[0]*sigma[0] + sm2;
1072 :
1073 : double ene = 0.0;
1074 264 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1075 : {
1076 : #pragma omp for reduction( + : ene)
1077 : for(unsigned i=0; i<narg; ++i) {
1078 : const double dev = scale*mean[i]-parameters[i]+offset;
1079 : const double a2 = 0.5*dev*dev + ss2;
1080 : if(sm2 > 0.0) {
1081 : ene += std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
1082 : } else {
1083 : ene += std::log(2.0*a2);
1084 : }
1085 : }
1086 : }
1087 : // add one single Jeffrey's prior and one normalisation per data point
1088 264 : ene += 0.5*std::log(sss) + static_cast<double>(narg)*0.5*std::log(0.5*M_PI*M_PI/ss2);
1089 264 : if(doscale_ || doregres_zero_) {
1090 156 : ene += 0.5*std::log(sss);
1091 : }
1092 264 : if(dooffset_) {
1093 0 : ene += 0.5*std::log(sss);
1094 : }
1095 264 : return kbt_ * ene;
1096 : }
1097 :
1098 144 : double Metainference::getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
1099 : const double scale, const double offset) {
1100 144 : const double scale2 = scale*scale;
1101 : double ene = 0.0;
1102 144 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1103 : {
1104 : #pragma omp for reduction( + : ene)
1105 : for(unsigned i=0; i<narg; ++i) {
1106 : const double sm2 = sigma_mean2_[i];
1107 : const double ss2 = sigma[i]*sigma[i] + scale2*sm2;
1108 : const double sss = sigma[i]*sigma[i] + sm2;
1109 : const double dev = scale*mean[i]-parameters[i]+offset;
1110 : const double a2 = 0.5*dev*dev + ss2;
1111 : if(sm2 > 0.0) {
1112 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
1113 : } else {
1114 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2);
1115 : }
1116 : if(doscale_ || doregres_zero_) {
1117 : ene += 0.5*std::log(sss);
1118 : }
1119 : if(dooffset_) {
1120 : ene += 0.5*std::log(sss);
1121 : }
1122 : }
1123 : }
1124 144 : return kbt_ * ene;
1125 : }
1126 :
1127 144 : double Metainference::getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
1128 : const double scale, const double offset) {
1129 : double ene = 0.0;
1130 144 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1131 : {
1132 : #pragma omp for reduction( + : ene)
1133 : for(unsigned i=0; i<narg; ++i) {
1134 : const double inv_sb2 = 1./(sigma[i]*sigma[i]);
1135 : const double inv_sm2 = 1./sigma_mean2_[i];
1136 : double devb = 0;
1137 : if(gen_likelihood_==LIKE_GAUSS) {
1138 : devb = scale*ftilde[i]-parameters[i]+offset;
1139 : } else if(gen_likelihood_==LIKE_LOGN) {
1140 : devb = std::log(scale*ftilde[i]/parameters[i]);
1141 : }
1142 : double devm = mean[i] - ftilde[i];
1143 : // deviation + normalisation + jeffrey
1144 : double normb = 0.;
1145 : if(gen_likelihood_==LIKE_GAUSS) {
1146 : normb = -0.5*std::log(0.5/M_PI*inv_sb2);
1147 : } else if(gen_likelihood_==LIKE_LOGN) {
1148 : normb = -0.5*std::log(0.5/M_PI*inv_sb2/(parameters[i]*parameters[i]));
1149 : }
1150 : const double normm = -0.5*std::log(0.5/M_PI*inv_sm2);
1151 : const double jeffreys = -0.5*std::log(2.*inv_sb2);
1152 : ene += 0.5*devb*devb*inv_sb2 + 0.5*devm*devm*inv_sm2 + normb + normm + jeffreys;
1153 : if(doscale_ || doregres_zero_) {
1154 : ene += jeffreys;
1155 : }
1156 : if(dooffset_) {
1157 : ene += jeffreys;
1158 : }
1159 : }
1160 : }
1161 144 : return kbt_ * ene;
1162 : }
1163 :
1164 108 : double Metainference::getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
1165 : const double scale, const double offset) {
1166 108 : const double scale2 = scale*scale;
1167 108 : const double inv_s2 = 1./(sigma[0]*sigma[0] + scale2*sigma_mean2_[0]);
1168 108 : const double inv_sss = 1./(sigma[0]*sigma[0] + sigma_mean2_[0]);
1169 :
1170 : double ene = 0.0;
1171 108 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1172 : {
1173 : #pragma omp for reduction( + : ene)
1174 : for(unsigned i=0; i<narg; ++i) {
1175 : double dev = scale*mean[i]-parameters[i]+offset;
1176 : ene += 0.5*dev*dev*inv_s2;
1177 : }
1178 : }
1179 108 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1180 108 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1181 : // add Jeffrey's prior in case one sigma for all data points + one normalisation per datapoint
1182 108 : ene += jeffreys + static_cast<double>(narg)*normalisation;
1183 108 : if(doscale_ || doregres_zero_) {
1184 0 : ene += jeffreys;
1185 : }
1186 108 : if(dooffset_) {
1187 108 : ene += jeffreys;
1188 : }
1189 :
1190 108 : return kbt_ * ene;
1191 : }
1192 :
1193 152 : double Metainference::getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
1194 : const double scale, const double offset) {
1195 152 : const double scale2 = scale*scale;
1196 :
1197 : double ene = 0.0;
1198 152 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1199 : {
1200 : #pragma omp for reduction( + : ene)
1201 : for(unsigned i=0; i<narg; ++i) {
1202 : const double inv_s2 = 1./(sigma[i]*sigma[i] + scale2*sigma_mean2_[i]);
1203 : const double inv_sss = 1./(sigma[i]*sigma[i] + sigma_mean2_[i]);
1204 : double dev = scale*mean[i]-parameters[i]+offset;
1205 : // deviation + normalisation + jeffrey
1206 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1207 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1208 : ene += 0.5*dev*dev*inv_s2 + normalisation + jeffreys;
1209 : if(doscale_ || doregres_zero_) {
1210 : ene += jeffreys;
1211 : }
1212 : if(dooffset_) {
1213 : ene += jeffreys;
1214 : }
1215 : }
1216 : }
1217 152 : return kbt_ * ene;
1218 : }
1219 :
1220 36 : void Metainference::moveTilde(const std::vector<double> &mean_, double &old_energy) {
1221 36 : std::vector<double> new_ftilde(sigma_.size());
1222 36 : new_ftilde = ftilde_;
1223 :
1224 : // change all tildes
1225 108 : for(unsigned j=0; j<sigma_.size(); j++) {
1226 72 : const double r3 = random[0].Gaussian();
1227 72 : const double ds3 = Dftilde_*std::sqrt(sigma_mean2_[j])*r3;
1228 72 : new_ftilde[j] = ftilde_[j] + ds3;
1229 : }
1230 : // calculate new energy
1231 36 : double new_energy = getEnergyMIGEN(mean_,new_ftilde,sigma_,scale_,offset_);
1232 :
1233 : // accept or reject
1234 36 : const double delta = ( new_energy - old_energy ) / kbt_;
1235 : // if delta is negative always accept move
1236 36 : if( delta <= 0.0 ) {
1237 36 : old_energy = new_energy;
1238 36 : ftilde_ = new_ftilde;
1239 36 : MCacceptFT_++;
1240 : // otherwise extract random number
1241 : } else {
1242 0 : const double s = random[0].RandU01();
1243 0 : if( s < std::exp(-delta) ) {
1244 0 : old_energy = new_energy;
1245 0 : ftilde_ = new_ftilde;
1246 0 : MCacceptFT_++;
1247 : }
1248 : }
1249 36 : }
1250 :
1251 216 : void Metainference::moveScaleOffset(const std::vector<double> &mean_, double &old_energy) {
1252 216 : double new_scale = scale_;
1253 :
1254 216 : if(doscale_) {
1255 144 : if(scale_prior_==SC_FLAT) {
1256 144 : const double r1 = random[1].Gaussian();
1257 144 : const double ds1 = Dscale_*r1;
1258 144 : new_scale += ds1;
1259 : // check boundaries
1260 144 : if(new_scale > scale_max_) {
1261 0 : new_scale = 2.0 * scale_max_ - new_scale;
1262 : }
1263 144 : if(new_scale < scale_min_) {
1264 0 : new_scale = 2.0 * scale_min_ - new_scale;
1265 : }
1266 : } else {
1267 0 : const double r1 = random[1].Gaussian();
1268 0 : const double ds1 = 0.5*(scale_mu_-new_scale)+Dscale_*std::exp(1)/M_PI*r1;
1269 0 : new_scale += ds1;
1270 : }
1271 : }
1272 :
1273 216 : double new_offset = offset_;
1274 :
1275 216 : if(dooffset_) {
1276 72 : if(offset_prior_==SC_FLAT) {
1277 72 : const double r1 = random[1].Gaussian();
1278 72 : const double ds1 = Doffset_*r1;
1279 72 : new_offset += ds1;
1280 : // check boundaries
1281 72 : if(new_offset > offset_max_) {
1282 0 : new_offset = 2.0 * offset_max_ - new_offset;
1283 : }
1284 72 : if(new_offset < offset_min_) {
1285 0 : new_offset = 2.0 * offset_min_ - new_offset;
1286 : }
1287 : } else {
1288 0 : const double r1 = random[1].Gaussian();
1289 0 : const double ds1 = 0.5*(offset_mu_-new_offset)+Doffset_*std::exp(1)/M_PI*r1;
1290 0 : new_offset += ds1;
1291 : }
1292 : }
1293 :
1294 : // calculate new energy
1295 : double new_energy = 0.;
1296 :
1297 216 : switch(noise_type_) {
1298 36 : case GAUSS:
1299 36 : new_energy = getEnergyGJ(mean_,sigma_,new_scale,new_offset);
1300 : break;
1301 48 : case MGAUSS:
1302 48 : new_energy = getEnergyGJE(mean_,sigma_,new_scale,new_offset);
1303 : break;
1304 48 : case OUTLIERS:
1305 48 : new_energy = getEnergySP(mean_,sigma_,new_scale,new_offset);
1306 : break;
1307 48 : case MOUTLIERS:
1308 48 : new_energy = getEnergySPE(mean_,sigma_,new_scale,new_offset);
1309 : break;
1310 36 : case GENERIC:
1311 36 : new_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,new_scale,new_offset);
1312 : break;
1313 : }
1314 :
1315 : // for the scale/offset we need to consider the total energy
1316 216 : std::vector<double> totenergies(2);
1317 216 : if(master) {
1318 144 : totenergies[0] = old_energy;
1319 144 : totenergies[1] = new_energy;
1320 144 : if(nrep_>1) {
1321 72 : multi_sim_comm.Sum(totenergies);
1322 : }
1323 : } else {
1324 72 : totenergies[0] = 0;
1325 72 : totenergies[1] = 0;
1326 : }
1327 216 : comm.Sum(totenergies);
1328 :
1329 : // accept or reject
1330 216 : const double delta = ( totenergies[1] - totenergies[0] ) / kbt_;
1331 : // if delta is negative always accept move
1332 216 : if( delta <= 0.0 ) {
1333 216 : old_energy = new_energy;
1334 216 : scale_ = new_scale;
1335 216 : offset_ = new_offset;
1336 216 : MCacceptScale_++;
1337 : // otherwise extract random number
1338 : } else {
1339 0 : double s = random[1].RandU01();
1340 0 : if( s < std::exp(-delta) ) {
1341 0 : old_energy = new_energy;
1342 0 : scale_ = new_scale;
1343 0 : offset_ = new_offset;
1344 0 : MCacceptScale_++;
1345 : }
1346 : }
1347 216 : }
1348 :
1349 280 : void Metainference::moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow) {
1350 280 : std::vector<double> new_sigma(sigma_.size());
1351 280 : new_sigma = sigma_;
1352 :
1353 : // change MCchunksize_ sigmas
1354 280 : if (MCchunksize_ > 0) {
1355 6 : if ((MCchunksize_ * i) >= sigma_.size()) {
1356 : // This means we are not moving any sigma, so we should break immediately
1357 0 : breaknow = true;
1358 : }
1359 :
1360 : // change random sigmas
1361 12 : for(unsigned j=0; j<MCchunksize_; j++) {
1362 6 : const unsigned shuffle_index = j + MCchunksize_ * i;
1363 6 : if (shuffle_index >= sigma_.size()) {
1364 : // Going any further will segfault but we should still evaluate the sigmas we changed
1365 : break;
1366 : }
1367 6 : const unsigned index = indices[shuffle_index];
1368 6 : const double r2 = random[0].Gaussian();
1369 6 : const double ds2 = Dsigma_[index]*r2;
1370 6 : new_sigma[index] = sigma_[index] + ds2;
1371 : // check boundaries
1372 6 : if(new_sigma[index] > sigma_max_[index]) {
1373 0 : new_sigma[index] = 2.0 * sigma_max_[index] - new_sigma[index];
1374 : }
1375 6 : if(new_sigma[index] < sigma_min_[index]) {
1376 0 : new_sigma[index] = 2.0 * sigma_min_[index] - new_sigma[index];
1377 : }
1378 : }
1379 : } else {
1380 : // change all sigmas
1381 3224 : for(unsigned j=0; j<sigma_.size(); j++) {
1382 2950 : const double r2 = random[0].Gaussian();
1383 2950 : const double ds2 = Dsigma_[j]*r2;
1384 2950 : new_sigma[j] = sigma_[j] + ds2;
1385 : // check boundaries
1386 2950 : if(new_sigma[j] > sigma_max_[j]) {
1387 0 : new_sigma[j] = 2.0 * sigma_max_[j] - new_sigma[j];
1388 : }
1389 2950 : if(new_sigma[j] < sigma_min_[j]) {
1390 0 : new_sigma[j] = 2.0 * sigma_min_[j] - new_sigma[j];
1391 : }
1392 : }
1393 : }
1394 :
1395 280 : if (breaknow) {
1396 : // We didnt move any sigmas, so no sense in evaluating anything
1397 : return;
1398 : }
1399 :
1400 : // calculate new energy
1401 : double new_energy = 0.;
1402 280 : switch(noise_type_) {
1403 36 : case GAUSS:
1404 36 : new_energy = getEnergyGJ(mean_,new_sigma,scale_,offset_);
1405 : break;
1406 52 : case MGAUSS:
1407 52 : new_energy = getEnergyGJE(mean_,new_sigma,scale_,offset_);
1408 : break;
1409 108 : case OUTLIERS:
1410 108 : new_energy = getEnergySP(mean_,new_sigma,scale_,offset_);
1411 : break;
1412 48 : case MOUTLIERS:
1413 48 : new_energy = getEnergySPE(mean_,new_sigma,scale_,offset_);
1414 : break;
1415 36 : case GENERIC:
1416 36 : new_energy = getEnergyMIGEN(mean_,ftilde_,new_sigma,scale_,offset_);
1417 : break;
1418 : }
1419 :
1420 : // accept or reject
1421 280 : const double delta = ( new_energy - old_energy ) / kbt_;
1422 : // if delta is negative always accept move
1423 280 : if( delta <= 0.0 ) {
1424 280 : old_energy = new_energy;
1425 280 : sigma_ = new_sigma;
1426 280 : MCaccept_++;
1427 : // otherwise extract random number
1428 : } else {
1429 0 : const double s = random[0].RandU01();
1430 0 : if( s < std::exp(-delta) ) {
1431 0 : old_energy = new_energy;
1432 0 : sigma_ = new_sigma;
1433 0 : MCaccept_++;
1434 : }
1435 : }
1436 : }
1437 :
1438 280 : double Metainference::doMonteCarlo(const std::vector<double> &mean_) {
1439 : // calculate old energy with the updated coordinates
1440 280 : double old_energy=0.;
1441 :
1442 280 : switch(noise_type_) {
1443 36 : case GAUSS:
1444 36 : old_energy = getEnergyGJ(mean_,sigma_,scale_,offset_);
1445 36 : break;
1446 52 : case MGAUSS:
1447 52 : old_energy = getEnergyGJE(mean_,sigma_,scale_,offset_);
1448 52 : break;
1449 108 : case OUTLIERS:
1450 108 : old_energy = getEnergySP(mean_,sigma_,scale_,offset_);
1451 108 : break;
1452 48 : case MOUTLIERS:
1453 48 : old_energy = getEnergySPE(mean_,sigma_,scale_,offset_);
1454 48 : break;
1455 36 : case GENERIC:
1456 36 : old_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,scale_,offset_);
1457 36 : break;
1458 : }
1459 :
1460 : // do not run MC if this is a replica-exchange trial
1461 280 : if(!getExchangeStep()) {
1462 :
1463 : // Create std::vector of random sigma indices
1464 : std::vector<unsigned> indices;
1465 280 : if (MCchunksize_ > 0) {
1466 12 : for (unsigned j=0; j<sigma_.size(); j++) {
1467 6 : indices.push_back(j);
1468 : }
1469 6 : random[2].Shuffle(indices);
1470 : }
1471 280 : bool breaknow = false;
1472 :
1473 : // cycle on MC steps
1474 560 : for(unsigned i=0; i<MCsteps_; ++i) {
1475 280 : MCtrial_++;
1476 : // propose move for ftilde
1477 280 : if(noise_type_==GENERIC) {
1478 36 : moveTilde(mean_, old_energy);
1479 : }
1480 : // propose move for scale and/or offset
1481 280 : if(doscale_||dooffset_) {
1482 216 : moveScaleOffset(mean_, old_energy);
1483 : }
1484 : // propose move for sigma
1485 280 : moveSigmas(mean_, old_energy, i, indices, breaknow);
1486 : // exit from the loop if this is the case
1487 280 : if(breaknow) {
1488 : break;
1489 : }
1490 : }
1491 :
1492 : /* save the result of the sampling */
1493 : /* ftilde */
1494 280 : if(noise_type_==GENERIC) {
1495 36 : double accept = static_cast<double>(MCacceptFT_) / static_cast<double>(MCtrial_);
1496 36 : valueAcceptFT->set(accept);
1497 108 : for(unsigned i=0; i<sigma_.size(); i++) {
1498 72 : valueFtilde[i]->set(ftilde_[i]);
1499 : }
1500 : }
1501 : /* scale and offset */
1502 280 : if(doscale_ || doregres_zero_) {
1503 150 : valueScale->set(scale_);
1504 : }
1505 280 : if(dooffset_) {
1506 72 : valueOffset->set(offset_);
1507 : }
1508 280 : if(doscale_||dooffset_) {
1509 216 : double accept = static_cast<double>(MCacceptScale_) / static_cast<double>(MCtrial_);
1510 216 : valueAcceptScale->set(accept);
1511 : }
1512 : /* sigmas */
1513 3236 : for(unsigned i=0; i<sigma_.size(); i++) {
1514 2956 : valueSigma[i]->set(sigma_[i]);
1515 : }
1516 280 : double accept = static_cast<double>(MCaccept_) / static_cast<double>(MCtrial_);
1517 280 : valueAccept->set(accept);
1518 : }
1519 :
1520 : // here we sum the score over the replicas to get the full metainference score that we save as a bias
1521 280 : if(master) {
1522 206 : if(nrep_>1) {
1523 74 : multi_sim_comm.Sum(old_energy);
1524 : }
1525 : } else {
1526 74 : old_energy=0;
1527 : }
1528 280 : comm.Sum(old_energy);
1529 :
1530 : // this is the energy with current coordinates and parameters
1531 280 : return old_energy;
1532 : }
1533 :
1534 : /*
1535 : In the following energy-force functions we don't add the normalisation and the jeffreys priors
1536 : because they are not needed for the forces, the correct MetaInference energy is the one calculated
1537 : in the Monte-Carlo
1538 : */
1539 :
1540 108 : void Metainference::getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1541 : const std::vector<double> &dmean_b) {
1542 108 : const double scale2 = scale_*scale_;
1543 108 : const double sm2 = sigma_mean2_[0];
1544 108 : const double ss2 = sigma_[0]*sigma_[0] + scale2*sm2;
1545 108 : std::vector<double> f(narg,0);
1546 :
1547 108 : if(master) {
1548 84 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1549 : {
1550 : #pragma omp for
1551 : for(unsigned i=0; i<narg; ++i) {
1552 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1553 : const double a2 = 0.5*dev*dev + ss2;
1554 : if(sm2 > 0.0) {
1555 : const double t = std::exp(-a2/sm2);
1556 : const double dt = 1./t;
1557 : const double dit = 1./(1.-dt);
1558 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1559 : } else {
1560 : f[i] = -scale_*dev*(1./a2);
1561 : }
1562 : }
1563 : }
1564 : // collect contribution to forces and energy from other replicas
1565 84 : if(nrep_>1) {
1566 24 : multi_sim_comm.Sum(&f[0],narg);
1567 : }
1568 : }
1569 : // intra-replica summation
1570 108 : comm.Sum(&f[0],narg);
1571 :
1572 : double w_tmp = 0.;
1573 720 : for(unsigned i=0; i<narg; ++i) {
1574 612 : setOutputForce(i, kbt_*f[i]*dmean_x[i]);
1575 612 : w_tmp += kbt_*f[i]*dmean_b[i];
1576 : }
1577 :
1578 108 : if(do_reweight_) {
1579 48 : setOutputForce(narg, w_tmp);
1580 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1581 : }
1582 108 : }
1583 :
1584 48 : void Metainference::getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1585 : const std::vector<double> &dmean_b) {
1586 48 : const double scale2 = scale_*scale_;
1587 48 : std::vector<double> f(narg,0);
1588 :
1589 48 : if(master) {
1590 24 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1591 : {
1592 : #pragma omp for
1593 : for(unsigned i=0; i<narg; ++i) {
1594 : const double sm2 = sigma_mean2_[i];
1595 : const double ss2 = sigma_[i]*sigma_[i] + scale2*sm2;
1596 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1597 : const double a2 = 0.5*dev*dev + ss2;
1598 : if(sm2 > 0.0) {
1599 : const double t = std::exp(-a2/sm2);
1600 : const double dt = 1./t;
1601 : const double dit = 1./(1.-dt);
1602 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1603 : } else {
1604 : f[i] = -scale_*dev*(1./a2);
1605 : }
1606 : }
1607 : }
1608 : // collect contribution to forces and energy from other replicas
1609 24 : if(nrep_>1) {
1610 24 : multi_sim_comm.Sum(&f[0],narg);
1611 : }
1612 : }
1613 48 : comm.Sum(&f[0],narg);
1614 :
1615 : double w_tmp = 0.;
1616 240 : for(unsigned i=0; i<narg; ++i) {
1617 192 : setOutputForce(i, kbt_ * dmean_x[i] * f[i]);
1618 192 : w_tmp += kbt_ * dmean_b[i] *f[i];
1619 : }
1620 :
1621 48 : if(do_reweight_) {
1622 48 : setOutputForce(narg, w_tmp);
1623 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1624 : }
1625 48 : }
1626 :
1627 36 : void Metainference::getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1628 : const std::vector<double> &dmean_b) {
1629 36 : const double scale2 = scale_*scale_;
1630 36 : double inv_s2=0.;
1631 :
1632 36 : if(master) {
1633 36 : inv_s2 = 1./(sigma_[0]*sigma_[0] + scale2*sigma_mean2_[0]);
1634 36 : if(nrep_>1) {
1635 0 : multi_sim_comm.Sum(inv_s2);
1636 : }
1637 : }
1638 36 : comm.Sum(inv_s2);
1639 :
1640 : double w_tmp = 0.;
1641 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1642 : {
1643 : #pragma omp for reduction( + : w_tmp)
1644 : for(unsigned i=0; i<narg; ++i) {
1645 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1646 : const double mult = dev*scale_*inv_s2;
1647 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1648 : w_tmp += kbt_*dmean_b[i]*mult;
1649 : }
1650 : }
1651 :
1652 36 : if(do_reweight_) {
1653 0 : setOutputForce(narg, -w_tmp);
1654 0 : getPntrToComponent("biasDer")->set(w_tmp);
1655 : }
1656 36 : }
1657 :
1658 52 : void Metainference::getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1659 : const std::vector<double> &dmean_b) {
1660 52 : const double scale2 = scale_*scale_;
1661 52 : std::vector<double> inv_s2(sigma_.size(),0.);
1662 :
1663 52 : if(master) {
1664 1300 : for(unsigned i=0; i<sigma_.size(); ++i) {
1665 1274 : inv_s2[i] = 1./(sigma_[i]*sigma_[i] + scale2*sigma_mean2_[i]);
1666 : }
1667 26 : if(nrep_>1) {
1668 26 : multi_sim_comm.Sum(&inv_s2[0],sigma_.size());
1669 : }
1670 : }
1671 52 : comm.Sum(&inv_s2[0],sigma_.size());
1672 :
1673 : double w_tmp = 0.;
1674 52 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1675 : {
1676 : #pragma omp for reduction( + : w_tmp)
1677 : for(unsigned i=0; i<narg; ++i) {
1678 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1679 : const double mult = dev*scale_*inv_s2[i];
1680 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1681 : w_tmp += kbt_*dmean_b[i]*mult;
1682 : }
1683 : }
1684 :
1685 52 : if(do_reweight_) {
1686 52 : setOutputForce(narg, -w_tmp);
1687 104 : getPntrToComponent("biasDer")->set(w_tmp);
1688 : }
1689 52 : }
1690 :
1691 36 : void Metainference::getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b) {
1692 36 : std::vector<double> inv_s2(sigma_.size(),0.);
1693 36 : std::vector<double> dev(sigma_.size(),0.);
1694 36 : std::vector<double> dev2(sigma_.size(),0.);
1695 :
1696 108 : for(unsigned i=0; i<sigma_.size(); ++i) {
1697 72 : inv_s2[i] = 1./sigma_mean2_[i];
1698 72 : if(master) {
1699 72 : dev[i] = (mean[i]-ftilde_[i]);
1700 72 : dev2[i] = dev[i]*dev[i];
1701 : }
1702 : }
1703 36 : if(master&&nrep_>1) {
1704 0 : multi_sim_comm.Sum(&dev[0],dev.size());
1705 0 : multi_sim_comm.Sum(&dev2[0],dev2.size());
1706 : }
1707 36 : comm.Sum(&dev[0],dev.size());
1708 36 : comm.Sum(&dev2[0],dev2.size());
1709 :
1710 : double dene_b = 0.;
1711 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(dene_b)
1712 : {
1713 : #pragma omp for reduction( + : dene_b)
1714 : for(unsigned i=0; i<narg; ++i) {
1715 : const double dene_x = kbt_*inv_s2[i]*dmean_x[i]*dev[i];
1716 : dene_b += kbt_*inv_s2[i]*dmean_b[i]*dev[i];
1717 : setOutputForce(i, -dene_x);
1718 : }
1719 : }
1720 :
1721 36 : if(do_reweight_) {
1722 0 : setOutputForce(narg, -dene_b);
1723 0 : getPntrToComponent("biasDer")->set(dene_b);
1724 : }
1725 36 : }
1726 :
1727 280 : void Metainference::get_weights(const unsigned iselect, double &weight, double &norm, double &neff) {
1728 280 : const double dnrep = static_cast<double>(nrep_);
1729 : // calculate the weights either from BIAS
1730 280 : if(do_reweight_) {
1731 148 : std::vector<double> bias(nrep_,0);
1732 148 : if(master) {
1733 74 : bias[replica_] = getArgument(narg);
1734 74 : if(nrep_>1) {
1735 74 : multi_sim_comm.Sum(&bias[0], nrep_);
1736 : }
1737 : }
1738 148 : comm.Sum(&bias[0], nrep_);
1739 :
1740 : // accumulate weights
1741 148 : const double decay = 1./static_cast<double> (average_weights_stride_);
1742 148 : if(!firstTimeW[iselect]) {
1743 408 : for(unsigned i=0; i<nrep_; ++i) {
1744 272 : const double delta=bias[i]-average_weights_[iselect][i];
1745 272 : average_weights_[iselect][i]+=decay*delta;
1746 : }
1747 : } else {
1748 : firstTimeW[iselect] = false;
1749 36 : for(unsigned i=0; i<nrep_; ++i) {
1750 24 : average_weights_[iselect][i] = bias[i];
1751 : }
1752 : }
1753 :
1754 : // set average back into bias and set norm to one
1755 148 : const double maxbias = *(std::max_element(average_weights_[iselect].begin(), average_weights_[iselect].end()));
1756 444 : for(unsigned i=0; i<nrep_; ++i) {
1757 296 : bias[i] = std::exp((average_weights_[iselect][i]-maxbias)/kbt_);
1758 : }
1759 : // set local weight, norm and weight variance
1760 148 : weight = bias[replica_];
1761 : double w2=0.;
1762 444 : for(unsigned i=0; i<nrep_; ++i) {
1763 296 : w2 += bias[i]*bias[i];
1764 296 : norm += bias[i];
1765 : }
1766 148 : neff = norm*norm/w2;
1767 296 : getPntrToComponent("weight")->set(weight/norm);
1768 : } else {
1769 : // or arithmetic ones
1770 132 : neff = dnrep;
1771 132 : weight = 1.0;
1772 132 : norm = dnrep;
1773 : }
1774 280 : getPntrToComponent("neff")->set(neff);
1775 280 : }
1776 :
1777 280 : void Metainference::get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean) {
1778 280 : const double dnrep = static_cast<double>(nrep_);
1779 280 : std::vector<double> sigma_mean2_tmp(sigma_mean2_.size(), 0.);
1780 :
1781 280 : if(do_optsigmamean_>0) {
1782 : // remove first entry of the history std::vector
1783 0 : if(sigma_mean2_last_[iselect][0].size()==optsigmamean_stride_&&optsigmamean_stride_>0)
1784 0 : for(unsigned i=0; i<narg; ++i) {
1785 0 : sigma_mean2_last_[iselect][i].erase(sigma_mean2_last_[iselect][i].begin());
1786 : }
1787 : /* this is the current estimate of sigma mean for each argument
1788 : there is one of this per argument in any case because it is
1789 : the maximum among these to be used in case of GAUSS/OUTLIER */
1790 0 : std::vector<double> sigma_mean2_now(narg,0);
1791 0 : if(master) {
1792 0 : for(unsigned i=0; i<narg; ++i) {
1793 0 : sigma_mean2_now[i] = weight*(getArgument(i)-mean[i])*(getArgument(i)-mean[i]);
1794 : }
1795 0 : if(nrep_>1) {
1796 0 : multi_sim_comm.Sum(&sigma_mean2_now[0], narg);
1797 : }
1798 : }
1799 0 : comm.Sum(&sigma_mean2_now[0], narg);
1800 0 : for(unsigned i=0; i<narg; ++i) {
1801 0 : sigma_mean2_now[i] *= 1.0/(neff-1.)/norm;
1802 : }
1803 :
1804 : // add sigma_mean2 to history
1805 0 : if(optsigmamean_stride_>0) {
1806 0 : for(unsigned i=0; i<narg; ++i) {
1807 0 : sigma_mean2_last_[iselect][i].push_back(sigma_mean2_now[i]);
1808 : }
1809 : } else {
1810 0 : for(unsigned i=0; i<narg; ++i)
1811 0 : if(sigma_mean2_now[i] > sigma_mean2_last_[iselect][i][0]) {
1812 0 : sigma_mean2_last_[iselect][i][0] = sigma_mean2_now[i];
1813 : }
1814 : }
1815 :
1816 0 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1817 0 : for(unsigned i=0; i<narg; ++i) {
1818 : /* set to the maximum in history std::vector */
1819 0 : sigma_mean2_tmp[i] = *max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end());
1820 : /* the standard error of the mean */
1821 0 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1822 0 : if(noise_type_==GENERIC) {
1823 0 : sigma_min_[i] = std::sqrt(sigma_mean2_tmp[i]);
1824 0 : if(sigma_[i] < sigma_min_[i]) {
1825 0 : sigma_[i] = sigma_min_[i];
1826 : }
1827 : }
1828 : }
1829 0 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1830 : // find maximum for each data point
1831 : std::vector <double> max_values;
1832 0 : for(unsigned i=0; i<narg; ++i) {
1833 0 : max_values.push_back(*max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end()));
1834 : }
1835 : // find maximum across data points
1836 0 : const double max_now = *max_element(max_values.begin(), max_values.end());
1837 : // set new value
1838 0 : sigma_mean2_tmp[0] = max_now;
1839 0 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1840 : }
1841 : // endif sigma mean optimization
1842 : // start sigma max optimization
1843 0 : if(do_optsigmamean_>1&&!sigmamax_opt_done_) {
1844 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1845 0 : if(sigma_max_est_[i]<sigma_mean2_tmp[i]&&optimized_step_>optsigmamean_stride_) {
1846 0 : sigma_max_est_[i]=sigma_mean2_tmp[i];
1847 : }
1848 : // ready to set once and for all the value of sigma_max
1849 0 : if(optimized_step_==N_optimized_step_) {
1850 0 : sigmamax_opt_done_=true;
1851 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1852 0 : sigma_max_[i]=std::sqrt(sigma_max_est_[i]*dnrep);
1853 0 : Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
1854 0 : if(sigma_[i]>sigma_max_[i]) {
1855 0 : sigma_[i]=sigma_max_[i];
1856 : }
1857 : }
1858 : }
1859 : }
1860 0 : optimized_step_++;
1861 : }
1862 : // end sigma max optimization
1863 : } else {
1864 280 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1865 2948 : for(unsigned i=0; i<narg; ++i) {
1866 2812 : sigma_mean2_tmp[i] = sigma_mean2_last_[iselect][i][0];
1867 2812 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1868 : }
1869 144 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1870 144 : sigma_mean2_tmp[0] = sigma_mean2_last_[iselect][0][0];
1871 144 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1872 : }
1873 : }
1874 :
1875 280 : sigma_mean2_ = sigma_mean2_tmp;
1876 280 : }
1877 :
1878 280 : void Metainference::replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b) {
1879 280 : if(master) {
1880 2236 : for(unsigned i=0; i<narg; ++i) {
1881 2030 : mean[i] = weight/norm*getArgument(i);
1882 : }
1883 206 : if(nrep_>1) {
1884 74 : multi_sim_comm.Sum(&mean[0], narg);
1885 : }
1886 : }
1887 280 : comm.Sum(&mean[0], narg);
1888 : // set the derivative of the mean with respect to the bias
1889 3776 : for(unsigned i=0; i<narg; ++i) {
1890 3496 : dmean_b[i] = weight/norm/kbt_*(getArgument(i)-mean[i])/static_cast<double>(average_weights_stride_);
1891 : }
1892 :
1893 : // this is only for generic metainference
1894 280 : if(firstTime) {
1895 28 : ftilde_ = mean;
1896 28 : firstTime = false;
1897 : }
1898 280 : }
1899 :
1900 6 : void Metainference::do_regression_zero(const std::vector<double> &mean) {
1901 : // parameters[i] = scale_ * mean[i]: find scale_ with linear regression
1902 : double num = 0.0;
1903 : double den = 0.0;
1904 48 : for(unsigned i=0; i<parameters.size(); ++i) {
1905 42 : num += mean[i] * parameters[i];
1906 42 : den += mean[i] * mean[i];
1907 : }
1908 6 : if(den>0) {
1909 6 : scale_ = num / den;
1910 : } else {
1911 0 : scale_ = 1.0;
1912 : }
1913 6 : }
1914 :
1915 280 : void Metainference::calculate() {
1916 : // get step
1917 280 : const long long int step = getStep();
1918 :
1919 : unsigned iselect = 0;
1920 : // set the value of selector for REM-like stuff
1921 280 : if(selector_.length()>0) {
1922 0 : iselect = static_cast<unsigned>(plumed.passMap[selector_]);
1923 : }
1924 :
1925 : /* 1) collect weights */
1926 280 : double weight = 0.;
1927 280 : double neff = 0.;
1928 280 : double norm = 0.;
1929 280 : get_weights(iselect, weight, norm, neff);
1930 :
1931 : /* 2) calculate average */
1932 280 : std::vector<double> mean(narg,0);
1933 : // this is the derivative of the mean with respect to the argument
1934 280 : std::vector<double> dmean_x(narg,weight/norm);
1935 : // this is the derivative of the mean with respect to the bias
1936 280 : std::vector<double> dmean_b(narg,0);
1937 : // calculate it
1938 280 : replica_averaging(weight, norm, mean, dmean_b);
1939 :
1940 : /* 3) calculates parameters */
1941 280 : get_sigma_mean(iselect, weight, norm, neff, mean);
1942 :
1943 : // in case of regression with zero intercept, calculate scale
1944 280 : if(doregres_zero_ && step%nregres_zero_==0) {
1945 6 : do_regression_zero(mean);
1946 : }
1947 :
1948 : /* 4) run monte carlo */
1949 280 : double ene = doMonteCarlo(mean);
1950 :
1951 : // calculate bias and forces
1952 280 : switch(noise_type_) {
1953 36 : case GAUSS:
1954 36 : getEnergyForceGJ(mean, dmean_x, dmean_b);
1955 : break;
1956 52 : case MGAUSS:
1957 52 : getEnergyForceGJE(mean, dmean_x, dmean_b);
1958 : break;
1959 108 : case OUTLIERS:
1960 108 : getEnergyForceSP(mean, dmean_x, dmean_b);
1961 : break;
1962 48 : case MOUTLIERS:
1963 48 : getEnergyForceSPE(mean, dmean_x, dmean_b);
1964 : break;
1965 36 : case GENERIC:
1966 36 : getEnergyForceMIGEN(mean, dmean_x, dmean_b);
1967 : break;
1968 : }
1969 :
1970 : setBias(ene);
1971 280 : }
1972 :
1973 32 : void Metainference::writeStatus() {
1974 32 : sfile_.rewind();
1975 32 : sfile_.printField("time",getTimeStep()*getStep());
1976 : //nsel
1977 64 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
1978 : std::string msg_i,msg_j;
1979 32 : Tools::convert(i,msg_i);
1980 : std::vector <double> max_values;
1981 : //narg
1982 2518 : for(unsigned j=0; j<narg; ++j) {
1983 2486 : Tools::convert(j,msg_j);
1984 2486 : std::string msg = msg_i+"_"+msg_j;
1985 2486 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1986 7182 : sfile_.printField("sigmaMean_"+msg,std::sqrt(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end())));
1987 : } else {
1988 : // find maximum for each data point
1989 184 : max_values.push_back(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end()));
1990 : }
1991 : }
1992 32 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1993 : // find maximum across data points
1994 17 : const double max_now = std::sqrt(*max_element(max_values.begin(), max_values.end()));
1995 17 : Tools::convert(0,msg_j);
1996 17 : std::string msg = msg_i+"_"+msg_j;
1997 34 : sfile_.printField("sigmaMean_"+msg, max_now);
1998 : }
1999 : }
2000 2443 : for(unsigned i=0; i<sigma_.size(); ++i) {
2001 : std::string msg;
2002 2411 : Tools::convert(i,msg);
2003 4822 : sfile_.printField("sigma_"+msg,sigma_[i]);
2004 : }
2005 2443 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
2006 : std::string msg;
2007 2411 : Tools::convert(i,msg);
2008 4822 : sfile_.printField("sigma_max_"+msg,sigma_max_[i]);
2009 : }
2010 32 : if(noise_type_==GENERIC) {
2011 9 : for(unsigned i=0; i<ftilde_.size(); ++i) {
2012 : std::string msg;
2013 6 : Tools::convert(i,msg);
2014 12 : sfile_.printField("ftilde_"+msg,ftilde_[i]);
2015 : }
2016 : }
2017 32 : sfile_.printField("scale0_",scale_);
2018 32 : sfile_.printField("offset0_",offset_);
2019 64 : for(unsigned i=0; i<average_weights_.size(); i++) {
2020 : std::string msg_i;
2021 32 : Tools::convert(i,msg_i);
2022 64 : sfile_.printField("weight_"+msg_i,average_weights_[i][replica_]);
2023 : }
2024 32 : sfile_.printField();
2025 32 : sfile_.flush();
2026 32 : }
2027 :
2028 280 : void Metainference::update() {
2029 : // write status file
2030 280 : if(write_stride_>0&& (getStep()%write_stride_==0 || getCPT()) ) {
2031 32 : writeStatus();
2032 : }
2033 280 : }
2034 :
2035 : }
2036 : }
2037 :
|