Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "cltools/CLTool.h"
23 : #include "cltools/CLToolRegister.h"
24 : #include "tools/Tools.h"
25 : #include "tools/Pbc.h"
26 : #include "core/Value.h"
27 : #include "reference/ReferenceConfiguration.h"
28 : #include "PathReparameterization.h"
29 : #include "reference/MetricRegister.h"
30 : #include <cstdio>
31 : #include <string>
32 : #include <vector>
33 : #include <iostream>
34 :
35 : namespace PLMD {
36 : namespace mapping {
37 :
38 : //+PLUMEDOC TOOLS pathtools
39 : /*
40 : pathtools can be used to construct paths from pdb data
41 :
42 : The path CVs in PLUMED are curvilinear coordinates through a high dimensional vector space.
43 : Enhanced sampling calculations are often run using the progress along the paths and the distance from the path as CVs
44 : as this provides a convenient way of defining a reaction coordinate for a complicated process. This method is explained
45 : in the documentation for \ref PATH.
46 :
47 : The path itself is an ordered set of equally-spaced, high-dimensional frames the way in which these frames
48 : should be constructed will depend on the problem in hand. In other words, you will need to understand the reaction
49 : you wish to study in order to select a sensible set of frames to use in your path CV. This tool provides two
50 : methods that may be useful when it comes to constructing paths; namely:
51 :
52 : - A tool that takes in an initial guess path in which the frames are not equally spaced. This tool adjusts the positions
53 : of the frames in order to make them equally spaced so that they can be used as the basis for a path CV.
54 :
55 : - A tool that takes two frames as input and that allows you to return a linear path connecting these two frames. The
56 : output from this method may be useful as an initial guess path. It is arguable that a linear path rather defeats the
57 : purpose of the path CV method, however, as the whole purpose is to be able to define non-linear paths.
58 :
59 : Notice that you can use these two methods and take advantage of all the ways of measuring \ref dists that are available within
60 : PLUMED. The way you do this with each of these tools described above is explained in the example below.
61 :
62 : \par Examples
63 :
64 : The example below shows how you can take a set of unequally spaced frames from a pdb file named in_path.pdb
65 : and use pathtools to make them equally spaced so that they can be used as the basis for a path CV. The file
66 : containing this final path is named final_path.pdb.
67 :
68 : \verbatim
69 : plumed pathtools --path in_path.pdb --metric EUCLIDEAN --out final_path.pdb
70 : \endverbatim
71 :
72 : The example below shows how can create an initial linear path connecting the two pdb frames in start.pdb and
73 : end.pdb. In this case the path output to path.pdb will consist of 6 frames: the initial and final frames that
74 : were contained in start.pdb and end.pdb as well as four equally spaced frames along the vector connecting
75 : start.pdb to end.pdb.
76 :
77 : \verbatim
78 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb
79 : \endverbatim
80 :
81 : Often the idea with path collective variables is to create a path connecting some initial state A to some final state B. You would
82 : in this case have representative configurations from your A and B states defined in the input files to pathtools
83 : that we have called start.pdb and end.pdb in the example above. Furthermore, it may be useful to have a few frames
84 : before your start frame and after your end frame. You can use path tools to create these extended paths as shown below.
85 : In this case the final path would now consist of 8 frames. Four of these frames would lie on the vector connecting state
86 : A to state B, there would be one frame each at start.pdb and end.pdb as well as one frame just before start.pdb and one
87 : frame just after end.pdb. All these frames would be equally spaced.
88 :
89 : \verbatim
90 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb --nframes-before-start 2 --nframes-after-end 2
91 : \endverbatim
92 :
93 : Notice also that when you re-parameterize paths you must choose two frames to fix. Generally you chose to fix the states
94 : that are representative of your states A and B. By default pathtools will fix the first and last frames. You can, however,
95 : change the states to fix by taking advantage of the fixed flag as shown below.
96 :
97 : \verbatim
98 : plumed pathtools --path inpath.pdb --metric EUCLIDEAN --out outpath.pdb --fixed 2,12
99 : \endverbatim
100 :
101 : */
102 : //+ENDPLUMEDOC
103 :
104 : class PathTools :
105 : public CLTool {
106 : public:
107 : static void registerKeywords( Keywords& keys );
108 : explicit PathTools(const CLToolOptions& co );
109 : int main(FILE* in, FILE*out,Communicator& pc);
110 4 : std::string description()const {
111 4 : return "print out a description of the keywords for an action in html";
112 : }
113 : };
114 :
115 13793 : PLUMED_REGISTER_CLTOOL(PathTools,"pathtools")
116 :
117 4595 : void PathTools::registerKeywords( Keywords& keys ) {
118 4595 : CLTool::registerKeywords( keys );
119 9190 : keys.add("atoms","--start","a pdb file that contains the structure for the initial frame of your path");
120 9190 : keys.add("atoms","--end","a pdb file that contains the structure for the final frame of your path");
121 9190 : keys.add("atoms-1","--path","a pdb file that contains an initial path in which the frames are not equally spaced");
122 9190 : keys.add("compulsory","--fixed","0","the frames to fix when constructing the path using --path");
123 9190 : keys.add("compulsory","--metric","the measure to use to calculate the distance between frames");
124 9190 : keys.add("compulsory","--out","the name of the file on which to output your path");
125 9190 : keys.add("compulsory","--arg-fmt","%f","the format to use for argument values in your frames");
126 9190 : keys.add("compulsory","--tolerance","1E-4","the tolerance to use for the algorithm that is used to re-parameterize the path");
127 9190 : keys.add("compulsory","--nframes-before-start","1","the number of frames to include in the path before the first frame");
128 9190 : keys.add("compulsory","--nframes","1","the number of frames between the start and end frames in your path");
129 9190 : keys.add("compulsory","--nframes-after-end","1","the number of frames to put after the last frame of your path");
130 4595 : }
131 :
132 8 : PathTools::PathTools(const CLToolOptions& co ):
133 8 : CLTool(co) {
134 8 : inputdata=commandline;
135 8 : }
136 :
137 4 : int PathTools::main(FILE* in, FILE*out,Communicator& pc) {
138 : std::string mtype;
139 8 : parse("--metric",mtype);
140 : std::string ifilename;
141 8 : parse("--path",ifilename);
142 : std::string ofmt;
143 8 : parse("--arg-fmt",ofmt);
144 : std::string ofilename;
145 8 : parse("--out",ofilename);
146 4 : if( ifilename.length()>0 ) {
147 : std::fprintf(out,"Reparameterising path in file named %s so that all frames are equally spaced \n",ifilename.c_str() );
148 2 : FILE* fp=std::fopen(ifilename.c_str(),"r");
149 : // call fclose when fp goes out of scope
150 : auto deleter=[](FILE* f) {
151 : if(f) {
152 2 : std::fclose(f);
153 : }
154 2 : };
155 : std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp,deleter);
156 : bool do_read=true;
157 : std::vector<std::unique_ptr<ReferenceConfiguration>> frames;
158 24 : while (do_read) {
159 22 : PDB mypdb;
160 : // Read the pdb file
161 22 : do_read=mypdb.readFromFilepointer(fp,false,0.1);
162 22 : if( do_read ) {
163 20 : auto mymsd(metricRegister().create<ReferenceConfiguration>( mtype, mypdb ));
164 20 : frames.emplace_back( std::move(mymsd) );
165 20 : }
166 22 : }
167 : std::vector<unsigned> fixed;
168 4 : parseVector("--fixed",fixed);
169 2 : if( fixed.size()==1 ) {
170 1 : if( fixed[0]!=0 ) {
171 0 : error("input to --fixed should be two integers");
172 : }
173 1 : fixed.resize(2);
174 1 : fixed[0]=0;
175 1 : fixed[1]=frames.size()-1;
176 1 : } else if( fixed.size()==2 ) {
177 1 : if( fixed[0]>(frames.size()-1) || fixed[1]>(frames.size()-1) ) {
178 0 : error("input to --fixed should be two numbers between 0 and the number of frames-1");
179 : }
180 : } else {
181 0 : error("input to --fixed should be two integers");
182 : }
183 : std::vector<AtomNumber> atoms;
184 : std::vector<std::string> arg_names;
185 22 : for(unsigned i=0; i<frames.size(); ++i) {
186 20 : frames[i]->getAtomRequests( atoms);
187 20 : frames[i]->getArgumentRequests( arg_names );
188 : }
189 : // Generate stuff to reparameterize
190 2 : Pbc fake_pbc;
191 : std::vector<std::unique_ptr<Value>> vals;
192 4 : for(unsigned i=0; i<frames[0]->getNumberOfReferenceArguments(); ++i) {
193 2 : vals.emplace_back(Tools::make_unique<Value>());
194 2 : vals[vals.size()-1]->setNotPeriodic();
195 : }
196 :
197 : // temporary pointes used to make the conversion once
198 :
199 2 : auto vals_ptr=Tools::unique2raw(vals);
200 : // And reparameterize
201 2 : PathReparameterization myparam( fake_pbc, vals_ptr, frames );
202 : // And make all points equally spaced
203 : double tol;
204 4 : parse("--tolerance",tol);
205 2 : myparam.reparameterize( fixed[0], fixed[1], tol );
206 :
207 : // Output data on spacings
208 : double mean=0;
209 2 : MultiValue myvpack( 1, frames[0]->getNumberOfReferenceArguments() + 3*frames[0]->getNumberOfReferencePositions() + 9 );
210 2 : ReferenceValuePack mypack( frames[0]->getNumberOfReferenceArguments(), frames[0]->getNumberOfReferencePositions(), myvpack );
211 20 : for(unsigned i=1; i<frames.size(); ++i) {
212 18 : double len = frames[i]->calc( frames[i-1]->getReferencePositions(), fake_pbc, vals_ptr, frames[i-1]->getReferenceArguments(), mypack, false );
213 : printf("FINAL DISTANCE BETWEEN FRAME %u AND %u IS %f \n",i-1,i,len );
214 18 : mean+=len*len;
215 : }
216 2 : printf("SUGGESTED LAMBDA PARAMETER IS THUS %f \n",2.3/(mean/static_cast<double>( frames.size()-1 )) );
217 :
218 : // Delete all the frames
219 2 : OFile ofile;
220 2 : ofile.open(ofilename);
221 : std::vector<std::string> argnames;
222 2 : frames[0]->getArgumentRequests( argnames );
223 : std::vector<AtomNumber> atindices;
224 2 : frames[0]->getAtomRequests( atindices );
225 2 : PDB mypdb;
226 2 : mypdb.setAtomNumbers( atindices );
227 2 : mypdb.setArgumentNames( argnames );
228 22 : for(unsigned i=0; i<frames.size(); ++i) {
229 20 : mypdb.setAtomPositions( frames[i]->getReferencePositions() );
230 40 : for(unsigned j=0; j<argnames.size(); ++j) {
231 20 : mypdb.setArgumentValue( argnames[j], frames[i]->getReferenceArguments()[j] );
232 : }
233 20 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
234 20 : mypdb.print( 10, NULL, ofile, ofmt );
235 : }
236 : // Delete the vals as we don't need them
237 : // for(unsigned i=0; i<vals.size(); ++i) delete vals[i];
238 : // Return as we are done
239 : return 0;
240 10 : }
241 :
242 : // Read initial frame
243 : std::string istart;
244 2 : parse("--start",istart);
245 2 : PDB mystartpdb;
246 : {
247 2 : FILE* fp2=std::fopen(istart.c_str(),"r");
248 : // call fclose when fp goes out of scope
249 : auto deleter=[](FILE* f) {
250 2 : std::fclose(f);
251 2 : };
252 : std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp2,deleter);
253 2 : if( istart.length()==0 ) {
254 0 : error("input is missing use --istart + --iend or --path");
255 : }
256 2 : if( !mystartpdb.readFromFilepointer(fp2,false,0.1) ) {
257 0 : error("could not read fila " + istart);
258 : }
259 : }
260 2 : auto sframe=metricRegister().create<ReferenceConfiguration>( mtype, mystartpdb );
261 :
262 : // Read final frame
263 : std::string iend;
264 2 : parse("--end",iend);
265 2 : PDB myendpdb;
266 : {
267 2 : FILE* fp1=std::fopen(iend.c_str(),"r");
268 : // call fclose when fp goes out of scope
269 : auto deleter=[](FILE* f) {
270 2 : std::fclose(f);
271 2 : };
272 : std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp1,deleter);
273 2 : if( iend.length()==0 ) {
274 0 : error("input is missing using --istart + --iend or --path");
275 : }
276 2 : if( !myendpdb.readFromFilepointer(fp1,false,0.1) ) {
277 0 : error("could not read fila " + iend);
278 : }
279 : }
280 2 : auto eframe=metricRegister().create<ReferenceConfiguration>( mtype, myendpdb );
281 :
282 : // Get atoms and arg requests
283 : std::vector<AtomNumber> atoms;
284 : std::vector<std::string> arg_names;
285 2 : sframe->getAtomRequests( atoms);
286 2 : eframe->getAtomRequests( atoms);
287 2 : sframe->getArgumentRequests( arg_names );
288 2 : eframe->getArgumentRequests( arg_names );
289 :
290 : // Now read in the rest of the instructions
291 : unsigned nbefore, nbetween, nafter;
292 2 : parse("--nframes-before-start",nbefore);
293 2 : parse("--nframes",nbetween);
294 2 : parse("--nframes-after-end",nafter);
295 2 : nbetween++;
296 : std::fprintf(out,"Generating linear path connecting structure in file named %s to structure in file named %s \n",istart.c_str(),iend.c_str() );
297 2 : std::fprintf(out,"A path consisting of %u equally-spaced frames before the initial structure, %u frames between the initial and final structures "
298 : "and %u frames after the final structure will be created \n",nbefore,nbetween,nafter);
299 :
300 : // Create a vector of arguments to use for calculating displacements
301 2 : Pbc fpbc;
302 : std::vector<std::unique_ptr<Value>> args;
303 4 : for(unsigned i=0; i<eframe->getNumberOfReferenceArguments(); ++i) {
304 2 : args.emplace_back(Tools::make_unique<Value>());
305 2 : args[args.size()-1]->setNotPeriodic();
306 : }
307 :
308 : // convert pointer once:
309 2 : auto args_ptr=Tools::unique2raw(args);
310 :
311 : // Calculate the distance between the start and the end
312 2 : MultiValue myvpack( 1, sframe->getNumberOfReferenceArguments() + 3*sframe->getNumberOfReferencePositions() + 9);
313 2 : ReferenceValuePack mypack( sframe->getNumberOfReferenceArguments(), sframe->getNumberOfReferencePositions(), myvpack );
314 2 : sframe->calc( eframe->getReferencePositions(), fpbc, args_ptr, eframe->getReferenceArguments(), mypack, false );
315 : // And the spacing between frames
316 2 : double delr = 1.0 / static_cast<double>( nbetween );
317 : // Calculate the vector connecting the start to the end
318 2 : PDB mypdb;
319 2 : mypdb.setAtomNumbers( sframe->getAbsoluteIndexes() );
320 2 : mypdb.addBlockEnd( sframe->getAbsoluteIndexes().size() );
321 2 : if( sframe->getArgumentNames().size()>0 ) {
322 1 : mypdb.setArgumentNames( sframe->getArgumentNames() );
323 : }
324 4 : Direction mydir(ReferenceConfigurationOptions("DIRECTION"));
325 2 : sframe->setupPCAStorage( mypack );
326 2 : mydir.read( mypdb );
327 2 : mydir.zeroDirection();
328 2 : sframe->extractDisplacementVector( eframe->getReferencePositions(), args_ptr, eframe->getReferenceArguments(), false, mydir );
329 :
330 : // Now create frames
331 2 : OFile ofile;
332 2 : ofile.open(ofilename);
333 : unsigned nframes=0;
334 4 : Direction pos(ReferenceConfigurationOptions("DIRECTION"));
335 2 : pos.read( mypdb );
336 4 : for(int i=0; i<nbefore; ++i) {
337 2 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
338 2 : pos.displaceReferenceConfiguration( -i*delr, mydir );
339 2 : mypdb.setAtomPositions( pos.getReferencePositions() );
340 4 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) {
341 2 : mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
342 : }
343 2 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
344 2 : mypdb.print( 10, NULL, ofile, ofmt );
345 : nframes++;
346 : }
347 8 : for(unsigned i=1; i<nbetween; ++i) {
348 6 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
349 6 : pos.displaceReferenceConfiguration( i*delr, mydir );
350 6 : mypdb.setAtomPositions( pos.getReferencePositions() );
351 16 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) {
352 10 : mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
353 : }
354 6 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
355 6 : mypdb.print( 10, NULL, ofile, ofmt );
356 : nframes++;
357 : }
358 7 : for(unsigned i=0; i<nafter; ++i) {
359 5 : pos.setDirection( eframe->getReferencePositions(), eframe->getReferenceArguments() );
360 5 : pos.displaceReferenceConfiguration( i*delr, mydir );
361 5 : mypdb.setAtomPositions( pos.getReferencePositions() );
362 13 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) {
363 8 : mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
364 : }
365 5 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
366 5 : mypdb.print( 10, NULL, ofile, ofmt );
367 : nframes++;
368 : }
369 :
370 2 : ofile.close();
371 : return 0;
372 10 : }
373 :
374 : } // End of namespace
375 : }
|