Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2015-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionRegister.h"
23 : #include "tools/KernelFunctions.h"
24 : #include "tools/IFile.h"
25 : #include "multicolvar/MultiColvarBase.h"
26 : #include "multicolvar/AtomValuePack.h"
27 : #include "PammObject.h"
28 :
29 : //+PLUMEDOC MCOLVARF PAMM
30 : /*
31 : Probabilistic analysis of molecular motifs.
32 :
33 : Probabilistic analysis of molecular motifs (PAMM) was introduced in this paper \cite pamm.
34 : The essence of this approach involves calculating some large set of collective variables
35 : for a set of atoms in a short trajectory and fitting this data using a Gaussian Mixture Model.
36 : The idea is that modes in these distributions can be used to identify features such as hydrogen bonds or
37 : secondary structure types.
38 :
39 : The assumption within this implementation is that the fitting of the Gaussian mixture model has been
40 : done elsewhere by a separate code. You thus provide an input file to this action which contains the
41 : means, covariance matrices and weights for a set of Gaussian kernels, \f$\{ \phi \}\f$. The values and
42 : derivatives for the following set of quantities is then computed:
43 :
44 : \f[
45 : s_k = \frac{ \phi_k}{ \sum_i \phi_i }
46 : \f]
47 :
48 : Each of the \f$\phi_k\f$ is a Gaussian function that acts on a set of quantities calculated within
49 : a \ref mcolv . These might be \ref TORSIONS, \ref DISTANCES, \ref ANGLES or any one of the many
50 : symmetry functions that are available within \ref mcolv actions. These quantities are then inserted into
51 : the set of \f$n\f$ kernels that are in the the input file. This will be done for multiple sets of values
52 : for the input quantities and a final quantity will be calculated by summing the above \f$s_k\f$ values or
53 : some transformation of the above. This sounds less complicated than it is and is best understood by
54 : looking through the example given below.
55 :
56 : \warning Mixing \ref mcolv actions that are periodic with variables that are not periodic has not been tested
57 :
58 : \par Examples
59 :
60 : In this example I will explain in detail what the following input is computing:
61 :
62 : \plumedfile
63 : #SETTINGS MOLFILE=regtest/basic/rt32/helix.pdb
64 : MOLINFO MOLTYPE=protein STRUCTURE=M1d.pdb
65 : psi: TORSIONS ATOMS1=@psi-2 ATOMS2=@psi-3 ATOMS3=@psi-4
66 : phi: TORSIONS ATOMS1=@phi-2 ATOMS2=@phi-3 ATOMS3=@phi-4
67 : p: PAMM DATA=phi,psi CLUSTERS=clusters.pamm MEAN1={COMPONENT=1} MEAN2={COMPONENT=2}
68 : PRINT ARG=p.mean-1,p.mean-2 FILE=colvar
69 : \endplumedfile
70 :
71 : The best place to start our explanation is to look at the contents of the clusters.pamm file
72 :
73 : \auxfile{clusters.pamm}
74 : #! FIELDS height phi psi sigma_phi_phi sigma_phi_psi sigma_psi_phi sigma_psi_psi
75 : #! SET multivariate von-misses
76 : #! SET kerneltype gaussian
77 : 2.97197455E-0001 -1.91983118E+0000 2.25029540E+0000 2.45960237E-0001 -1.30615381E-0001 -1.30615381E-0001 2.40239117E-0001
78 : 2.29131448E-0002 1.39809354E+0000 9.54585380E-0002 9.61755708E-0002 -3.55657919E-0002 -3.55657919E-0002 1.06147253E-0001
79 : 5.06676398E-0001 -1.09648066E+0000 -7.17867907E-0001 1.40523052E-0001 -1.05385552E-0001 -1.05385552E-0001 1.63290557E-0001
80 : \endauxfile
81 :
82 : This files contains the parameters of two two-dimensional Gaussian functions. Each of these Gaussian kernels has a weight, \f$w_k\f$,
83 : a vector that specifies the position of its center, \f$\mathbf{c}_k\f$, and a covariance matrix, \f$\Sigma_k\f$. The \f$\phi_k\f$ functions that
84 : we use to calculate our PAMM components are thus:
85 :
86 : \f[
87 : \phi_k = \frac{w_k}{N_k} \exp\left( -(\mathbf{s} - \mathbf{c}_k)^T \Sigma^{-1}_k (\mathbf{s} - \mathbf{c}_k) \right)
88 : \f]
89 :
90 : In the above \f$N_k\f$ is a normalization factor that is calculated based on \f$\Sigma\f$. The vector \f$\mathbf{s}\f$ is a vector of quantities
91 : that are calculated by the \ref TORSIONS actions. This vector must be two dimensional and in this case each component is the value of a
92 : torsion angle. If we look at the two \ref TORSIONS actions in the above we are calculating the \f$\phi\f$ and \f$\psi\f$ backbone torsional
93 : angles in a protein (Note the use of \ref MOLINFO to make specification of atoms straightforward). We thus calculate the values of our
94 : 2 \f$ \{ \phi \} \f$ kernels 3 times. The first time we use the \f$\phi\f$ and \f$\psi\f$ angles in the second residue of the protein,
95 : the second time it is the \f$\phi\f$ and \f$\psi\f$ angles of the third residue of the protein and the third time it is the \f$\phi\f$ and \f$\psi\f$ angles
96 : of the fourth residue in the protein. The final two quantities that are output by the print command, p.mean-1 and p.mean-2, are the averages
97 : over these three residues for the quantities:
98 : \f[
99 : s_1 = \frac{ \phi_1}{ \phi_1 + \phi_2 }
100 : \f]
101 : and
102 : \f[
103 : s_2 = \frac{ \phi_2}{ \phi_1 + \phi_2 }
104 : \f]
105 : There is a great deal of flexibility in this input. We can work with, and examine, any number of components, we can use any set of collective variables
106 : and compute these PAMM variables and we can transform the PAMM variables themselves in a large number of different ways when computing these sums.
107 : */
108 : //+ENDPLUMEDOC
109 :
110 : namespace PLMD {
111 : namespace pamm {
112 :
113 : class PAMM : public multicolvar::MultiColvarBase {
114 : private:
115 : PammObject mypamm;
116 : public:
117 : static void registerKeywords( Keywords& keys );
118 : explicit PAMM(const ActionOptions&);
119 : /// We have to overwrite this here
120 : unsigned getNumberOfQuantities() const override;
121 : /// Calculate the weight of this object ( average of input weights )
122 : using PLMD::multicolvar::MultiColvarBase::calculateWeight;
123 : void calculateWeight( multicolvar::AtomValuePack& myatoms );
124 : /// Actually do the calculation
125 : double compute( const unsigned& tindex, multicolvar::AtomValuePack& myatoms ) const override;
126 : /// This returns the position of the central atom
127 : Vector getCentralAtom();
128 : /// Is the variable periodic
129 16 : bool isPeriodic() override {
130 16 : return false;
131 : }
132 : };
133 :
134 13789 : PLUMED_REGISTER_ACTION(PAMM,"PAMM")
135 :
136 6 : void PAMM::registerKeywords( Keywords& keys ) {
137 6 : MultiColvarBase::registerKeywords( keys );
138 12 : keys.add("compulsory","DATA","the multicolvars from which the pamm coordinates are calculated");
139 12 : keys.add("compulsory","CLUSTERS","the name of the file that contains the definitions of all the clusters");
140 12 : keys.add("compulsory","REGULARISE","0.001","don't allow the denominator to be smaller then this value");
141 6 : keys.use("MEAN");
142 6 : keys.use("MORE_THAN");
143 6 : keys.use("SUM");
144 6 : keys.use("LESS_THAN");
145 6 : keys.use("HISTOGRAM");
146 6 : keys.use("MIN");
147 6 : keys.use("MAX");
148 6 : keys.use("LOWEST");
149 6 : keys.use("HIGHEST");
150 6 : keys.use("ALT_MIN");
151 6 : keys.use("BETWEEN");
152 6 : keys.use("MOMENTS");
153 6 : keys.setComponentsIntroduction("When the label of this action is used as the input for a second you are not referring to a scalar quantity as you are in "
154 : "regular collective variables. The label is used to reference the full set of quantities calculated by "
155 : "the action. This is usual when using \\ref multicolvarfunction. Generally when doing this the set of PAMM variables "
156 : "will be referenced using the DATA keyword rather than ARG.\n\n"
157 : "This Action can be used to calculate the following scalar quantities directly from the underlying set of PAMM variables. "
158 : "These quantities are calculated by employing the keywords listed below and they can be referenced elsewhere in the input "
159 : "file by using this Action's label followed by a dot and the name of the quantity. The particular PAMM variable that should "
160 : "be averaged in a MEAN command or transformed by a switching function in a LESS_THAN command is specified using the COMPONENT "
161 : "keyword. COMPONENT=1 refers to the PAMM variable in which the first kernel in your input file is on the numerator, COMPONENT=2 refers to "
162 : "PAMM variable in which the second kernel in the input file is on the numerator and so on. The same quantity can be calculated "
163 : "multiple times for different PAMM components by a single PAMM action. In this case the relevant keyword must appear multiple "
164 : "times on the input line followed by a numerical identifier i.e. MEAN1, MEAN2, ... The quantities calculated when multiple "
165 : "MEAN commands appear on the input line can be reference elsewhere in the input file by using the name of the quantity followed "
166 : "followed by a numerical identifier e.g. <em>label</em>.lessthan-1, <em>label</em>.lessthan-2 etc. Alternatively, you can "
167 : "customize the labels of the quantities by using the LABEL keyword in the description of the keyword.");
168 6 : keys.remove("ALL_INPUT_SAME_TYPE");
169 6 : }
170 :
171 2 : PAMM::PAMM(const ActionOptions& ao):
172 : Action(ao),
173 2 : MultiColvarBase(ao) {
174 : // This builds the lists
175 2 : buildSets();
176 : // Check for reasonable input
177 5 : for(unsigned i=0; i<getNumberOfBaseMultiColvars(); ++i) {
178 3 : if( getBaseMultiColvar(i)->getNumberOfQuantities()!=2 ) {
179 0 : error("cannot use PAMM with " + getBaseMultiColvar(i)->getName() );
180 : }
181 : }
182 :
183 2 : bool mixed=getBaseMultiColvar(0)->isPeriodic();
184 2 : std::vector<bool> pbc( getNumberOfBaseMultiColvars() );
185 2 : std::vector<std::string> valnames( getNumberOfBaseMultiColvars() );
186 2 : std::vector<std::string> min( getNumberOfBaseMultiColvars() ), max( getNumberOfBaseMultiColvars() );
187 5 : for(unsigned i=0; i<getNumberOfBaseMultiColvars(); ++i) {
188 3 : if( getBaseMultiColvar(i)->isPeriodic()!=mixed ) {
189 0 : warning("mixing of periodic and aperiodic base variables in pamm is untested");
190 : }
191 3 : pbc[i]=getBaseMultiColvar(i)->isPeriodic();
192 3 : if( pbc[i] ) {
193 2 : getBaseMultiColvar(i)->retrieveDomain( min[i], max[i] );
194 : }
195 3 : valnames[i]=getBaseMultiColvar(i)->getLabel();
196 : }
197 :
198 : double regulariser;
199 4 : parse("REGULARISE",regulariser);
200 : std::string errorstr, filename;
201 2 : parse("CLUSTERS",filename);
202 2 : mypamm.setup( filename, regulariser, valnames, pbc, min, max, errorstr );
203 2 : if( errorstr.length()>0 ) {
204 0 : error( errorstr );
205 : }
206 4 : }
207 :
208 86 : unsigned PAMM::getNumberOfQuantities() const {
209 86 : return 1 + mypamm.getNumberOfKernels();
210 : }
211 :
212 0 : void PAMM::calculateWeight( multicolvar::AtomValuePack& myatoms ) {
213 : unsigned nvars = getNumberOfBaseMultiColvars();
214 : // Weight of point is average of weights of input colvars?
215 0 : std::vector<double> tval(2);
216 : double ww=0;
217 0 : for(unsigned i=0; i<nvars; ++i) {
218 0 : getInputData( i, false, myatoms, tval );
219 0 : ww+=tval[0];
220 : }
221 0 : myatoms.setValue( 0, ww / static_cast<double>( nvars ) );
222 :
223 0 : if(!doNotCalculateDerivatives() ) {
224 0 : double pref = 1.0 / static_cast<double>( nvars );
225 0 : for(unsigned ivar=0; ivar<nvars; ++ivar) {
226 : // Get the values of derivatives
227 0 : const MultiValue& myder=getInputDerivatives( ivar, false, myatoms );
228 0 : for(unsigned j=0; j<myder.getNumberActive(); ++j) {
229 0 : unsigned jder=myder.getActiveIndex(j);
230 0 : myatoms.addDerivative( 0, jder, pref*myder.getDerivative( 0, jder ) );
231 : }
232 : }
233 : }
234 0 : }
235 :
236 28 : double PAMM::compute( const unsigned& tindex, multicolvar::AtomValuePack& myatoms ) const {
237 : unsigned nvars = getNumberOfBaseMultiColvars();
238 28 : std::vector<std::vector<double> > tderiv( mypamm.getNumberOfKernels() );
239 174 : for(unsigned i=0; i<tderiv.size(); ++i) {
240 146 : tderiv[i].resize( nvars );
241 : }
242 28 : std::vector<double> tval(2), invals( nvars ), vals( mypamm.getNumberOfKernels() );
243 :
244 74 : for(unsigned i=0; i<nvars; ++i) {
245 46 : getInputData( i, false, myatoms, tval );
246 46 : invals[i]=tval[1];
247 : }
248 28 : mypamm.evaluate( invals, vals, tderiv );
249 :
250 : // Now set all values other than the first one
251 : // This is because of some peverse choices in multicolvar
252 146 : for(unsigned i=1; i<vals.size(); ++i) {
253 118 : myatoms.setValue( 1+i, vals[i] );
254 : }
255 :
256 28 : if( !doNotCalculateDerivatives() ) {
257 28 : std::vector<double> mypref( 1 + vals.size() );
258 74 : for(unsigned ivar=0; ivar<nvars; ++ivar) {
259 : // Get the values of the derivatives
260 46 : MultiValue& myder = getInputDerivatives( ivar, false, myatoms );
261 : // And calculate the derivatives
262 318 : for(unsigned i=0; i<vals.size(); ++i) {
263 272 : mypref[1+i] = tderiv[i][ivar];
264 : }
265 : // This is basically doing the chain rule to get the final derivatives
266 46 : splitInputDerivatives( 1, 1, 1+vals.size(), ivar, mypref, myder, myatoms );
267 : // And clear the derivatives
268 46 : myder.clearAll();
269 : }
270 : }
271 :
272 56 : return vals[0];
273 28 : }
274 :
275 0 : Vector PAMM::getCentralAtom() {
276 : // Who knows how this should work
277 0 : plumed_error();
278 : // return Vector(1.0,0.0,0.0);
279 : }
280 :
281 : }
282 : }
|