All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Analysis.cpp
Go to the documentation of this file.
1 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2  Copyright (c) 2013 The plumed team
3  (see the PEOPLE file at the root of the distribution for a list of names)
4 
5  See http://www.plumed-code.org for more information.
6 
7  This file is part of plumed, version 2.0.
8 
9  plumed is free software: you can redistribute it and/or modify
10  it under the terms of the GNU Lesser General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  plumed is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public License
20  along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 
23 #include "Analysis.h"
24 #include "core/ActionSet.h"
25 #include "core/ActionWithValue.h"
26 #include "core/PlumedMain.h"
27 #include "core/Atoms.h"
28 #include "tools/IFile.h"
29 
30 namespace PLMD {
31 namespace analysis {
32 
33 //+PLUMEDOC INTERNAL reweighting
34 /*
35 Calculate free energies from a biassed/higher temperature trajectory.
36 
37 We can use our knowledge of the Boltzmann distribution in the cannonical ensemble to reweight the data
38 contained in trajectories. Using this procedure we can take trajectory at temperature \f$T_1\f$ and use it to
39 extract probabilities at a different temperature, \f$T_2\f$, using:
40 
41 \f[
42 P(s',t) = \frac{ \sum_{t'}^t \delta( s(x) - s' ) \exp\left( +( \left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) }{ \sum_t'^t \exp\left( +\left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) }
43 \f]
44 
45 where \f$U(x,t')\f$ is the potential energy of the system. Alternatively, if a static or pseudo-static bias \f$V(x,t')\f$ is acting on
46 the system we can remove this bias and get the unbiased probability distribution using:
47 
48 \f[
49 P(s',t) = \frac{ \sum_{t'}^t \delta( s(x) - s' ) \exp\left( +\frac{V(x,t')}{k_B T} \right) }{ \sum_t'^t \exp\left( +\frac{V(x,t')}{k_B T} \right) }
50 \f]
51 
52 */
53 //+ENDPLUMEDOC
54 
59  keys.use("ARG");
60  keys.add("compulsory","STRIDE","1","the frequency with which data should be stored for analysis");
61  keys.addFlag("USE_ALL_DATA",false,"use the data from the entire trajectory to perform the analysis");
62  keys.add("compulsory","RUN","the frequency with which to run the analysis algorithm. This is not required if you specify USE_ALL_DATA");
63  keys.add("optional","FMT","the format that should be used in analysis output files");
64  keys.addFlag("REWEIGHT_BIAS",false,"reweight the data using all the biases acting on the dynamics. For more information see \\ref reweighting.");
65  keys.add("optional","TEMP","the system temperature. This is required if you are reweighting.");
66  keys.add("optional","REWEIGHT_TEMP","reweight data from a trajectory at one temperature and output the probability "
67  "distribution at a second temperature. For more information see \\ref reweighting. "
68  "This is not possible during postprocessing.");
69  keys.addFlag("WRITE_CHECKPOINT",false,"write out a checkpoint so that the analysis can be restarted in a later run");
70  keys.add("hidden","REUSE_DATA_FROM","eventually this will allow you to analyse the same set of data multiple times");
71  keys.add("hidden","IGNORE_REWEIGHTING","this allows you to ignore any reweighting factors");
72  keys.reserveFlag("NOMEMORY",false,"analyse each block of data separately");
73 }
74 
76 Action(ao),
77 ActionPilot(ao),
79 single_run(false),
80 nomemory(true),
81 write_chq(false),
82 reusing_data(false),
83 ignore_reweight(false),
84 needeng(false),
85 idata(0),
86 firstAnalysisDone(false),
87 old_norm(0.0),
88 ofmt("%f")
89 {
90  parse("FMT",ofmt); // Read the format for output files
91  std::string prev_analysis; parse("REUSE_DATA_FROM",prev_analysis);
92  if( prev_analysis.length()>0 ){
93  reusing_data=true;
94  mydatastash=plumed.getActionSet().selectWithLabel<Analysis*>( prev_analysis );
95  if( !mydatastash ) error("could not find analysis action named " + prev_analysis );
96  parseFlag("IGNORE_REWEIGHTING",ignore_reweight);
97  if( ignore_reweight ) log.printf(" reusing data stored by %s but ignoring all reweighting\n",prev_analysis.c_str() );
98  else log.printf(" reusing data stored by %s\n",prev_analysis.c_str() );
99  } else {
100  if( keywords.exists("REWEIGHT_BIAS") ){
101  bool dobias; parseFlag("REWEIGHT_BIAS",dobias);
102  if( dobias ){
103  std::vector<ActionWithValue*> all=plumed.getActionSet().select<ActionWithValue*>();
104  if( all.empty() ) error("your input file is not telling plumed to calculate anything");
105  std::vector<Value*> arg( getArguments() );
106  log.printf(" reweigting using the following biases ");
107  for(unsigned j=0;j<all.size();j++){
108  std::string flab; flab=all[j]->getLabel() + ".bias";
109  if( all[j]->exists(flab) ){
110  biases.push_back( all[j]->copyOutput(flab) );
111  arg.push_back( all[j]->copyOutput(flab) );
112  log.printf(" %s",flab.c_str());
113  }
114  }
115  log.printf("\n");
116  if( biases.empty() ) error("you are asking to reweight bias but there does not appear to be a bias acting on your system");
117  requestArguments( arg );
118  }
119  }
120 
121  simtemp=0.; parse("TEMP",simtemp);
122  if( simtemp==0 && !biases.empty() ) error("to reweight you must specify a temperature use TEMP");
123  rtemp=0;
124  if( keywords.exists("REWEIGHT_TEMP") ) parse("REWEIGHT_TEMP",rtemp);
125  if( rtemp!=0 ){
126  if( simtemp==0 ) error("to reweight you must specify a temperature use TEMP");
127  needeng=true;
128  log.printf(" reweighting simulation at %f to probabilities at temperature %f\n",simtemp,rtemp);
129  }
130 
131  parseFlag("USE_ALL_DATA",single_run);
132  if( !single_run ){
133  parse("RUN",freq );
134  log.printf(" running analysis every %u steps\n",freq);
135  if( freq%getStride()!= 0 ) error("Frequncy of running is not a multiple of the stride");
136  ndata=freq/getStride();
137  data.resize( ndata );
138  for(unsigned i=0;i<ndata;++i){ data[i].resize( getNumberOfArguments() ); }
139  logweights.resize( ndata );
140  weights.resize( ndata );
141  } else {
142  log.printf(" analyzing all data in trajectory\n");
143  args.resize( getNumberOfArguments() );
144  }
145  if( keywords.exists("NOMEMORY") ){ nomemory=false; parseFlag("NOMEMORY",nomemory); }
146  if(nomemory) log.printf(" doing a separate analysis for each block of data\n");
147  parseFlag("WRITE_CHECKPOINT",write_chq);
148  if( write_chq && single_run ){
149  write_chq=false;
150  warning("ignoring WRITE_CHECKPOINT flag because we are analyzing all data");
151  }
152 
153  // We need no restart file if we are just collecting data and analyzing all of it
154  std::string filename = getName() + "_" + getLabel() + ".chkpnt";
155  if( write_chq ) rfile.link(*this);
156  if( plumed.getRestart() ){
157  if( single_run ) error("cannot restart histogram when using the USE_ALL_DATA option");
158  if( !write_chq ) warning("restarting without writing a checkpoint file is somewhat strange");
159  // Read in data from input file
160  readDataFromFile( filename );
161  // Setup the restart file (append mode)
162  if( write_chq ) rfile.open( filename.c_str() ); // In append mode automatically because of restart
163  // Run the analysis if we stoped in the middle of it last time
164  log.printf(" restarting analysis with %u points read from restart file\n",idata);
165  } else if( write_chq ){
166  // Setup the restart file (delete any old one)
167  rfile.open( filename.c_str() ); // In overwrite mode automatically because there is no restart
168  }
169  if( write_chq ){
170  rfile.addConstantField("old_normalization");
171  for(unsigned i=0;i<getNumberOfArguments();++i) rfile.setupPrintValue( getPntrToArgument(i) );
172  }
173  }
174 }
175 
176 void Analysis::readDataFromFile( const std::string& filename ){
177  double tstep, oldtstep; IFile ifile;
178  if( !ifile.FileExist(filename) ) error("failed to find required restart file " + filename );
179  ifile.open(filename);
180 
181  bool first=true;
182  while(ifile.scanField("time",tstep)){
183  if( !first && ((tstep-oldtstep) - getStride()*plumed.getAtoms().getTimeStep())>plumed.getAtoms().getTimeStep() ){
184  error("frequency of data storage in " + filename + " is not equal to frequency of data storage plumed.dat file");
185  }
186  for(unsigned j=0;j<getNumberOfArguments();++j){
187  ifile.scanField( getPntrToArgument(j)->getName(), data[idata][j] );
188  }
189  ifile.scanField("log_weight",logweights[idata]);
190  ifile.scanField("old_normalization",old_norm);
191  ifile.scanField();
192  idata++; first=false; oldtstep=tstep;
193  }
194  ifile.close();
195  if(old_norm>0) firstAnalysisDone=true;
196 }
197 
198 void Analysis::parseOutputFile( const std::string& key, std::string& filename ){
199  parse(key,filename);
200  if( !plumed.getRestart() ){
201  OFile ofile; ofile.link(*this);
202  ofile.setBackupString("analysis");
203  ofile.backupAllFiles(filename);
204  }
205 }
206 
208  if(needeng) plumed.getAtoms().setCollectEnergy(true);
209 }
210 
212 // do nothing
213 }
214 
216  // Don't store the first step (also don't store if we are getting data from elsewhere)
217  if( getStep()==0 || reusing_data ) return;
218  // This is used when we have a full quota of data from the first run
219  if( !single_run && idata==logweights.size() ) return;
220 
221  // Retrieve the bias
222  double bias=0.0; for(unsigned i=0;i<biases.size();++i) bias+=biases[i]->get();
223 
224  double ww=0;
225  if(needeng){
226  double energy=plumed.getAtoms().getEnergy()+bias;
227  // Reweighting because of temperature difference
228  ww=-( (1.0/rtemp) - (1.0/simtemp) )*(energy+bias) / plumed.getAtoms().getKBoltzmann();
229  }
230  // Reweighting because of biases
231  if( !biases.empty() ) ww += bias/( plumed.getAtoms().getKBoltzmann()*simtemp );
232 
233  if(single_run){
234  // Get the arguments and store them in a vector of vectors
235  for(unsigned i=0;i<getNumberOfArguments();++i) args[i]=getArgument(i);
236  data.push_back( args );
237  logweights.push_back(ww);
238  } else {
239  // Get the arguments and store them in a vector of vectors
240  for(unsigned i=0;i<getNumberOfArguments();++i) data[idata][i]=getArgument(i);
241  logweights[idata] = ww;
242  }
243  // Write data to checkpoint file
244  if( write_chq ) {
245  rfile.printField("time",getTime()); rfile.printField("old_normalization",old_norm);
246  for(unsigned i=0;i<getNumberOfArguments();++i) rfile.printField( getPntrToArgument(i), getArgument(i) );
247  rfile.printField("log_weight",logweights[idata]); rfile.printField();
248  }
249  // Increment data counter
250  idata++;
251 }
252 
254  if( write_chq ) rfile.close();
255 }
256 
257 void Analysis::finalizeWeights( const bool& ignore_weights ){
258  // Check that we have the correct ammount of data
259  if( !reusing_data && idata!=logweights.size() ) error("something has gone wrong. Am trying to run analysis but I don't have sufficient data");
260  if( weights.size()!=logweights.size() ) weights.resize( logweights.size() );
261 
262  norm=0; // Reset normalization constant
263  if( ignore_weights ){
264  for(unsigned i=0;i<logweights.size();++i){
265  weights[i]=1.0; norm+=1.0;
266  }
267  } else if( nomemory ){
268  // Find the maximum weight
269  double maxweight=logweights[0];
270  for(unsigned i=1;i<getNumberOfDataPoints();++i){
271  if(logweights[i]>maxweight) maxweight=logweights[i];
272  }
273  // Calculate normalization constant
274  for(unsigned i=0;i<logweights.size();++i){
275  norm+=exp( logweights[i]-maxweight );
276  }
277  // Calculate weights (no memory)
278  for(unsigned i=0;i<logweights.size();++i){
279  weights[i]=exp( logweights[i]-maxweight );
280  }
281  // Calculate normalized weights (with memory)
282  } else {
283  // Calculate normalization constant
284  for(unsigned i=0;i<logweights.size();++i){
285  norm+=exp( logweights[i] );
286  }
287  if( !firstAnalysisDone ) old_norm=1.0;
288  // Calculate weights (with memory)
289  for(unsigned i=0;i<logweights.size();++i){
290  weights[i] = exp( logweights[i] ) / old_norm;
291  }
292  if( !firstAnalysisDone ) old_norm=0.0;
293  }
294 }
295 
297 
298  // close the restart file so it is flushed
299  if( write_chq ) rfile.close();
300 
301  // Note : could add multiple walkers here - simply read in the data from all
302  // other walkers here if we are writing the check points.
303 
304  // Calculate the final weights from the log weights
305  if( !reusing_data ){
307  } else {
310  }
311  // And run the analysis
312  performAnalysis(); idata=0;
313  // Update total normalization constant
315 
316  // Delete the checkpoint file
317  if( write_chq ){
318  std::string filename = getName() + "_" + getLabel() + ".chkpnt";
319  // If we are running more than one calculation only reopen the restart file
320  if( !single_run ) rfile.open( filename.c_str(), "w+" );
321  }
322 }
323 
325  if( nomemory || !firstAnalysisDone ) return norm;
326  return ( 1. + norm/old_norm );
327 }
328 
330  accumulate();
331  if( !single_run ){
332  if( getStep()>0 && getStep()%freq==0 ) runAnalysis();
333  else if( idata==logweights.size() ) error("something has gone wrong. Probably a wrong initial time on restart");
334  }
335 }
336 
337 bool Analysis::getPeriodicityInformation(const unsigned& i, std::string& dmin, std::string& dmax){
338  bool isperiodic=getPntrToArgument(i)->isPeriodic();
339  if(isperiodic) getPntrToArgument(i)->getDomain(dmin,dmax);
340  return isperiodic;
341 }
342 
344  if( !single_run ) return;
345  if( getNumberOfDataPoints()==0 ) error("no data is available for analysis");
346  runAnalysis();
347 }
348 
349 }
350 }
bool reusing_data
Are we reusing data stored by another analysis action.
Definition: Analysis.h:52
void runFinalJobs()
RunFinalJobs This method is called once at the very end of the calculation.
Definition: Analysis.cpp:343
std::vector< Value * > biases
The biases we are using in reweighting and the args we store them separately.
Definition: Analysis.h:68
bool nomemory
Are we treating each block of data separately.
Definition: Analysis.h:48
void parseFlag(const std::string &key, bool &t)
Parse one keyword as boolean flag.
Definition: Action.cpp:104
bool getPeriodicityInformation(const unsigned &i, std::string &dmin, std::string &dmax)
Returns true if argument i is periodic together with the domain.
Definition: Analysis.cpp:337
Log & log
Reference to the log stream.
Definition: Action.h:93
unsigned idata
The piece of data we are inserting.
Definition: Analysis.h:70
unsigned ndata
Number of data point we need to run analysis.
Definition: Analysis.h:60
void backupAllFiles(const std::string &str)
This backs up all the files that would have been created with the name str.
Definition: OFile.cpp:222
bool ignore_reweight
If we are reusing data are we ignoring the reweighting in that data.
Definition: Analysis.h:54
This is used to create PLMD::Action objects that are run with some set frequency. ...
Definition: ActionPilot.h:39
OFile rfile
The checkpoint file.
Definition: Analysis.h:84
void warning(const std::string &msg)
Issue a warning.
Definition: Action.cpp:201
void parseOutputFile(const std::string &key, std::string &filename)
This is used to read in output file names for analysis methods.
Definition: Analysis.cpp:198
void finalizeWeights(const bool &ignore_weights)
Convert the stored log weights to proper weights.
Definition: Analysis.cpp:257
void readDataFromFile(const std::string &filename)
Read in data from a file.
Definition: Analysis.cpp:176
double retrieveNorm() const
This retrieves the value of norm from the analysis action.
Definition: Analysis.h:167
void requestArguments(const std::vector< Value * > &arg)
Setup the dependencies.
static void registerKeywords(Keywords &keys)
Registers the list of keywords.
bool firstAnalysisDone
Have we analyzed the data for the first time.
Definition: Analysis.h:78
void error(const std::string &msg) const
Crash calculation and print documentation.
Definition: Action.cpp:195
const std::string & getLabel() const
Returns the label.
Definition: Action.h:263
std::vector< std::vector< double > > data
The data we are going to analyze.
Definition: Analysis.h:74
Value * getPntrToArgument(const unsigned n)
Return a pointer to specific argument.
void add(const std::string &t, const std::string &k, const std::string &d)
Add a new keyword of type t with name k and description d.
Definition: Keywords.cpp:167
This is used to create PLMD::Action objects that take the output from some other Action as input...
std::vector< double > args
Tempory vector to store values of arguments.
Definition: Analysis.h:72
std::vector< double > weights
Definition: Analysis.h:76
OFile & addConstantField(const std::string &)
Definition: OFile.cpp:120
double getNormalization() const
Return the normalization constant.
Definition: Analysis.cpp:324
void update()
Update.
Definition: Analysis.cpp:329
Analysis * mydatastash
The Analysis action that we are reusing data from.
Definition: Analysis.h:56
Used to create a PLMD::Action that has some scalar or vectorial output that may or may not have some ...
double simtemp
The temperature at which we are running the calculation.
Definition: Analysis.h:62
int getStride() const
Definition: ActionPilot.cpp:42
void getDomain(std::string &, std::string &) const
Get the domain of the quantity.
Definition: Value.cpp:99
void parse(const std::string &key, T &t)
Parse one keyword as generic type.
Definition: Action.h:273
This class holds the keywords and their documentation.
Definition: Keywords.h:36
IFile & open(const std::string &name)
Opens the file.
Definition: IFile.cpp:90
This class is used to bring the relevant information to the Action constructor.
Definition: Action.h:41
unsigned freq
The frequency with which we are performing analysis.
Definition: Analysis.h:58
bool write_chq
Are we writing a checkpoint file.
Definition: Analysis.h:50
OFile & link(OFile &)
Allows linking this OFile to another one.
Definition: OFile.cpp:71
double getTime() const
Return the present time.
Definition: Action.cpp:173
int printf(const char *fmt,...)
Formatted output with explicit format - a la printf.
Definition: OFile.cpp:82
void close()
Closes the file Should be used only for explicitely opened files.
Definition: FileBase.cpp:140
const std::string & getName() const
Returns the name.
Definition: Action.h:268
Base class for all the input Actions.
Definition: Action.h:60
static void registerKeywords(Keywords &keys)
Register all the relevant keywords for the action.
Definition: Action.cpp:49
static void registerKeywords(Keywords &keys)
Create the keywords for actionPilot.
Definition: ActionPilot.cpp:27
Class for input files.
Definition: IFile.h:40
void use(const std::string &k)
Use one of the reserved keywords.
Definition: Keywords.cpp:154
std::vector< Value * > getArguments()
Overwrite ActionWithArguments getArguments() so that we don't return the bias.
Definition: Analysis.h:172
unsigned getNumberOfDataPoints() const
Return the number of data points.
Definition: Analysis.h:131
double getArgument(const unsigned n) const
Returns the value of an argument.
void reserveFlag(const std::string &k, const bool def, const std::string &d, const bool isvessel=false)
Reserve a flag.
Definition: Keywords.cpp:140
This is the abstract base class to use for implementing new methods for analyzing the trajectory...
Definition: Analysis.h:40
void prepare()
Prepare an Action for calculation This can be used by Action if they need some special preparation be...
Definition: Analysis.cpp:207
unsigned getNumberOfArguments() const
Return the number of arguments (this overwrites the one in ActionWithArguments)
Definition: Analysis.h:161
IFile & scanField(const std::string &, double &)
Read a double field.
Definition: IFile.cpp:121
virtual void performAnalysis()=0
long int getStep() const
Return the present timestep.
Definition: Action.cpp:169
bool exists(const std::string &k) const
Check if there is a keyword with name k.
Definition: Keywords.cpp:239
Class for output files.
Definition: OFile.h:84
bool FileExist(const std::string &path)
Check if the file exists.
Definition: FileBase.cpp:118
OFile & printField(const std::string &, double)
Set the value of a double precision field.
Definition: OFile.cpp:145
Main plumed object.
Definition: Plumed.h:201
bool isPeriodic() const
Check if the value is periodic.
Definition: Value.cpp:75
void calculate()
Calculate an Action.
Definition: Analysis.cpp:211
bool single_run
Are we running only once for the whole trajectory.
Definition: Analysis.h:46
Analysis(const ActionOptions &)
Definition: Analysis.cpp:75
void setBackupString(const std::string &)
Set the string name to be used for automatic backup.
Definition: OFile.cpp:218
double rtemp
The temperature at which we want the histogram.
Definition: Analysis.h:64
bool needeng
Do we need the energy (are we reweighting at a different temperature)
Definition: Analysis.h:66
static void registerKeywords(Keywords &keys)
Definition: Analysis.cpp:55
OFile & open(const std::string &name)
Opens the file using automatic append/backup.
Definition: OFile.cpp:264
const Keywords & keywords
Definition: Action.h:161
std::string ofmt
The format to use in output files.
Definition: Analysis.h:82
double norm
The value of the old normalization constant.
Definition: Analysis.h:80
OFile & setupPrintValue(Value *val)
Used to setup printing of values.
Definition: OFile.cpp:172
void addFlag(const std::string &k, const bool def, const std::string &d)
Add a falg with name k that is by default on if def is true and off if def is false. d should provide a description of the flag.
Definition: Keywords.cpp:193
std::vector< double > logweights
The weights of all the data points.
Definition: Analysis.h:76