Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2018 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionRegister.h"
23 : #include "core/PlumedMain.h"
24 : #include "core/ActionSet.h"
25 : #include "core/ActionShortcut.h"
26 : #include "core/ActionWithValue.h"
27 :
28 : //+PLUMEDOC FUNCTION MAHALANOBIS_DISTANCE
29 : /*
30 : Calculate the Mahalanobis distance between two points in CV space
31 :
32 : If we have two $n$-dimensional vectors $u$ and $v$ we can calculate the
33 : [Mahalanobis distance](https://en.wikipedia.org/wiki/Mahalanobis_distance) between the two points as
34 :
35 : $$
36 : d = \sqrt{ \sum_{i=1}^n \sum_{j=1}^n m_{ij} (u_i - v_i)(u_j - v_j) }
37 : $$
38 :
39 : which can be expressed in matrix form as:
40 :
41 : $$
42 : d^2 = (u-v)^T M (u-v)
43 : $$
44 :
45 : The inputs below shows an example where this is used to calculate the Mahalanobis distance
46 : between the instaneous values of some torsional angles and some reference values
47 : for these distances. The inverse covriance values are provided in the constant value with label `m`.
48 : In this first example the input values are vectors:
49 :
50 : ```plumed
51 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
52 : c: CONSTANT VALUES=1,2
53 : d: DISTANCE ATOMS1=1,2 ATOMS2=3,4
54 : dd: MAHALANOBIS_DISTANCE ARG1=c ARG2=d METRIC=m
55 : PRINT ARG=dd FILE=colvar
56 : ```
57 :
58 : while this second example does the same thing but uses scalars in input.
59 :
60 : ```plumed
61 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
62 : c1: CONSTANT VALUE=1
63 : d1: DISTANCE ATOMS=1,2
64 : c2: CONSTANT VALUE=2
65 : d2: DISTANCE ATOMS=3,4
66 : dd: MAHALANOBIS_DISTANCE ARG1=c1,c2 ARG2=d1,d2 METRIC=m
67 : PRINT ARG=dd FILE=colvar
68 : ```
69 :
70 : Lastly, note that if you want to calculate the square of the distance rather than the distance you can use
71 : the `SQUARED` flag as shown below:
72 :
73 : ```plumed
74 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
75 : c: CONSTANT VALUES=1,2
76 : d: DISTANCE ATOMS1=1,2 ATOMS2=3,4
77 : dd: MAHALANOBIS_DISTANCE ARG1=c ARG2=d METRIC=m SQUARED
78 : PRINT ARG=dd FILE=colvar
79 : ```
80 :
81 : Calculating the square of the distance is slightly cheapter than computing the distance as you avoid taking the square root.
82 :
83 : ## Dealing with periodic variables
84 :
85 : When you are calculating a distance from a reference point you need to be careful when the input variables
86 : are periodic. If you are calculating the distance using the [EUCLIDEAN_DISTANCE](EUCLIDEAN_DISTANCE.md) and
87 : [NORMALIZED_EUCLIDEAN_DISTANCE](NORMALIZED_EUCLIDEAN_DISTANCE.md) commands this is not a problem. The problems are
88 : specific to the Mahalanobis distance command and have been resolved in the papers that are cited below by defining
89 : the following alternatative to the Mahalanobis distance:
90 :
91 : $$
92 : d^2 = 2\sum_{i=1}^n m_{ii} \left[ 1 - \cos\left( \frac{2\pi(u_i-v_i)}{P_i} \right) \right] + \sum_{i\ne j} m_{ij} \sin\left( \frac{2\pi(u_i-v_i)}{P_i} \right) \sin\left( \frac{2\pi(u_j-v_j)}{P_j} \right)
93 : $$
94 :
95 : In this expression, $P_i$ indicates the periodicity of the domain for variable $i$. If you would like to compute this
96 : distance with PLUMED you use the `VON_MISSES` shown below:
97 :
98 : ```plumed
99 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
100 : c: CONSTANT VALUES=1,2
101 : d: TORSION ATOMS1=1,2,3,4 ATOMS2=5,6,7,8
102 : dd: MAHALANOBIS_DISTANCE ARG1=c ARG2=d METRIC=m VON_MISSES
103 : PRINT ARG=dd FILE=colvar
104 : ```
105 :
106 : ## Calculating multiple distances
107 :
108 : Suppose that we now have $m$ reference configurations we can define the following $m$ distances
109 : from these reference configurations:
110 :
111 : $$
112 : d_j^2 = (u-v_j)^T M (u-v_j)
113 : $$
114 :
115 : Lets suppose that we put the $m$, $n$-dimensional $(u-v_j)$ vectors in this expression into a
116 : $n\times m$ matrix, $A$, by using the [DISPLACEMENT](DISPLACEMENT.md) command. It is then
117 : straightforward to show that the $d_j^2$ values in the above expression are the diagonal
118 : elements of the matrix product $A^T M A$.
119 :
120 : We can use this idea to calculate multiple MAHALANOBIS_DISTANCE values in the following inputs.
121 : This first example calculates the three distances between the instaneoues values of two torsions
122 : and three reference configurations.
123 :
124 : ```plumed
125 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
126 : ref_psi: CONSTANT VALUES=2.25,1.3,-1.5
127 : ref_phi: CONSTANT VALUES=-1.91,-0.6,2.4
128 :
129 : psi: TORSION ATOMS=1,2,3,4
130 : phi: TORSION ATOMS=13,14,15,16
131 :
132 : dd: MAHALANOBIS_DISTANCE ARG2=psi,phi ARG1=ref_psi,ref_phi METRIC=m
133 : PRINT ARG=dd FILE=colvar
134 : ```
135 :
136 : This section example calculates the three distances between a single reference value for the two
137 : torsions and three instances of this pair of torsions.
138 :
139 : ```plumed
140 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
141 : ref_psi: CONSTANT VALUES=2.25
142 : ref_phi: CONSTANT VALUES=-1.91
143 :
144 : psi: TORSION ATOMS1=1,2,3,4 ATOMS2=5,6,7,8 ATOMS3=9,10,11,12
145 : phi: TORSION ATOMS1=13,14,15,16 ATOMS2=17,18,19,20 ATOMS3=21,22,23,24
146 :
147 : dd: MAHALANOBIS_DISTANCE ARG1=psi,phi ARG2=ref_psi,ref_phi METRIC=m
148 : PRINT ARG=dd FILE=colvar
149 : ```
150 :
151 : This final example then computes three distances between three pairs of torsional angles and threee
152 : reference values for these three values.
153 :
154 : ```plumed
155 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
156 : ref_psi: CONSTANT VALUES=2.25,1.3,-1.5
157 : ref_phi: CONSTANT VALUES=-1.91,-0.6,2.4
158 :
159 : psi: TORSION ATOMS1=1,2,3,4 ATOMS2=5,6,7,8 ATOMS3=9,10,11,12
160 : phi: TORSION ATOMS1=13,14,15,16 ATOMS2=17,18,19,20 ATOMS3=21,22,23,24
161 :
162 : dd: MAHALANOBIS_DISTANCE ARG1=psi,phi ARG2=ref_psi,ref_phi METRIC=m
163 : PRINT ARG=dd FILE=colvar
164 : ```
165 :
166 : Notice, finally, that you can also calculate multiple distances if you use the `VON_MISSES` option:
167 :
168 : ```plumed
169 : m: CONSTANT VALUES=2.45960237E-0001,-1.30615381E-0001,-1.30615381E-0001,2.40239117E-0001 NROWS=2 NCOLS=2
170 : ref_psi: CONSTANT VALUES=2.25,1.3,-1.5
171 : ref_phi: CONSTANT VALUES=-1.91,-0.6,2.4
172 :
173 : psi: TORSION ATOMS1=1,2,3,4 ATOMS2=5,6,7,8 ATOMS3=9,10,11,12
174 : phi: TORSION ATOMS1=13,14,15,16 ATOMS2=17,18,19,20 ATOMS3=21,22,23,24
175 :
176 : dd: MAHALANOBIS_DISTANCE ARG1=psi,phi ARG2=ref_psi,ref_phi METRIC=m VON_MISSES
177 : PRINT ARG=dd FILE=colvar
178 : ```
179 :
180 : !!! note "scalars must be specified in ARG2"
181 :
182 : If you use a mixture of vectors are scalars when specifying the input to to this action the
183 : vectors should be passed using the ARG1 keyword and the scalars must be passed in the ARG2 keyword
184 : as is done in the example inputs above.
185 :
186 : */
187 : //+ENDPLUMEDOC
188 :
189 : namespace PLMD {
190 : namespace refdist {
191 :
192 : class MahalanobisDistance : public ActionShortcut {
193 : public:
194 : static void registerKeywords( Keywords& keys );
195 : explicit MahalanobisDistance(const ActionOptions&ao);
196 : };
197 :
198 : PLUMED_REGISTER_ACTION(MahalanobisDistance,"MAHALANOBIS_DISTANCE")
199 :
200 21 : void MahalanobisDistance::registerKeywords( Keywords& keys ) {
201 21 : ActionShortcut::registerKeywords(keys);
202 21 : keys.add("compulsory","ARG1","The point that we are calculating the distance from");
203 21 : keys.add("compulsory","ARG2","The point that we are calculating the distance to");
204 21 : keys.add("compulsory","METRIC","The inverse covariance matrix that should be used when calculating the distance");
205 21 : keys.addFlag("SQUARED",false,"The squared distance should be calculated");
206 21 : keys.addFlag("VON_MISSES",false,"Compute the mahalanobis distance in a way that is more sympathetic to the periodic boundary conditions");
207 42 : keys.setValueDescription("scalar/vector","the Mahalanobis distances between the input vectors");
208 21 : keys.needsAction("DISPLACEMENT");
209 21 : keys.needsAction("CUSTOM");
210 21 : keys.needsAction("OUTER_PRODUCT");
211 21 : keys.needsAction("TRANSPOSE");
212 21 : keys.needsAction("MATRIX_PRODUCT_DIAGONAL");
213 21 : keys.needsAction("CONSTANT");
214 21 : keys.needsAction("MATRIX_VECTOR_PRODUCT");
215 21 : keys.needsAction("MATRIX_PRODUCT");
216 21 : keys.needsAction("COMBINE");
217 21 : keys.addDOI("10.1073/pnas.1011511107");
218 21 : keys.addDOI("10.1021/acs.jctc.7b00993");
219 21 : }
220 :
221 12 : MahalanobisDistance::MahalanobisDistance( const ActionOptions& ao):
222 : Action(ao),
223 12 : ActionShortcut(ao) {
224 : std::string arg1, arg2, metstr;
225 12 : parse("ARG1",arg1);
226 12 : parse("ARG2",arg2);
227 12 : parse("METRIC",metstr);
228 : // Check on input metric
229 12 : ActionWithValue* mav=plumed.getActionSet().selectWithLabel<ActionWithValue*>( metstr );
230 12 : if( !mav ) {
231 0 : error("could not find action named " + metstr + " to use for metric");
232 : }
233 12 : if( mav->copyOutput(0)->getRank()!=2 ) {
234 0 : error("metric has incorrect rank");
235 : }
236 :
237 24 : readInputLine( getShortcutLabel() + "_diff: DISPLACEMENT ARG1=" + arg1 + " ARG2=" + arg2 );
238 24 : readInputLine( getShortcutLabel() + "_diffT: TRANSPOSE ARG=" + getShortcutLabel() + "_diff");
239 : bool von_miss, squared;
240 12 : parseFlag("VON_MISSES",von_miss);
241 12 : parseFlag("SQUARED",squared);
242 12 : if( von_miss ) {
243 7 : unsigned nrows = mav->copyOutput(0)->getShape()[0];
244 7 : if( mav->copyOutput(0)->getShape()[1]!=nrows ) {
245 0 : error("metric is not symmetric");
246 : }
247 : // Create a matrix that can be used to compute the off diagonal elements
248 : std::string valstr, nrstr;
249 7 : Tools::convert( mav->copyOutput(0)->get(0), valstr );
250 7 : Tools::convert( nrows, nrstr );
251 7 : std::string diagmet = getShortcutLabel() + "_diagmet: CONSTANT VALUES=" + valstr;
252 14 : std::string offdiagmet = getShortcutLabel() + "_offdiagmet: CONSTANT NROWS=" + nrstr + " NCOLS=" + nrstr + " VALUES=0";
253 21 : for(unsigned i=0; i<nrows; ++i) {
254 42 : for(unsigned j=0; j<nrows; ++j) {
255 28 : Tools::convert( mav->copyOutput(0)->get(i*nrows+j), valstr );
256 28 : if( i==j && i>0 ) {
257 : offdiagmet += ",0";
258 14 : diagmet += "," + valstr;
259 21 : } else if( i!=j ) {
260 28 : offdiagmet += "," + valstr;
261 : }
262 : }
263 : }
264 7 : readInputLine( diagmet );
265 7 : readInputLine( offdiagmet );
266 : // Compute distances scaled by periods
267 7 : ActionWithValue* av=plumed.getActionSet().selectWithLabel<ActionWithValue*>( getShortcutLabel() + "_diff" );
268 7 : plumed_assert( av );
269 7 : if( !av->copyOutput(0)->isPeriodic() ) {
270 0 : error("VON_MISSES only works with periodic variables");
271 : }
272 : std::string min, max;
273 7 : av->copyOutput(0)->getDomain(min,max);
274 14 : readInputLine( getShortcutLabel() + "_scaled: CUSTOM ARG=" + getShortcutLabel() + "_diffT FUNC=2*pi*x/(" + max +"-" + min + ") PERIODIC=NO");
275 : // We start calculating off-diagonal elements by computing the sines of the scaled differences (this is a column vector)
276 14 : readInputLine( getShortcutLabel() + "_sinediffT: CUSTOM ARG=" + getShortcutLabel() + "_scaled FUNC=sin(x) PERIODIC=NO");
277 : // Transpose sines to get a row vector
278 14 : readInputLine( getShortcutLabel() + "_sinediff: TRANSPOSE ARG=" + getShortcutLabel() + "_sinediffT");
279 : // Compute the off diagonal elements
280 7 : ActionWithValue* avs=plumed.getActionSet().selectWithLabel<ActionWithValue*>( getShortcutLabel() + "_sinediffT" );
281 7 : plumed_assert( avs && avs->getNumberOfComponents()==1 );
282 7 : if( (avs->copyOutput(0))->getRank()==1 ) {
283 0 : readInputLine( getShortcutLabel() + "_matvec: MATRIX_VECTOR_PRODUCT ARG=" + metstr + "," + getShortcutLabel() +"_sinediffT");
284 : } else {
285 14 : readInputLine( getShortcutLabel() + "_matvec: MATRIX_PRODUCT ARG=" + getShortcutLabel() + "_offdiagmet," + getShortcutLabel() +"_sinediffT");
286 : }
287 14 : readInputLine( getShortcutLabel() + "_offdiag: MATRIX_PRODUCT_DIAGONAL ARG=" + getShortcutLabel() + "_sinediff," + getShortcutLabel() +"_matvec");
288 : // Sort out the metric for the diagonal elements
289 7 : std::string metstr2 = getShortcutLabel() + "_diagmet";
290 : // If this is a matrix we need create a matrix to multiply by
291 7 : if( av->copyOutput(0)->getShape()[0]>1 ) {
292 : // Create some ones
293 7 : std::string ones=" VALUES=1";
294 21 : for(unsigned i=1; i<av->copyOutput(0)->getShape()[0]; ++i ) {
295 : ones += ",1";
296 : }
297 14 : readInputLine( getShortcutLabel() + "_ones: CONSTANT " + ones );
298 : // Now do some multiplication to create a matrix that can be multiplied by our "inverse variance" vector
299 14 : readInputLine( getShortcutLabel() + "_" + metstr + ": OUTER_PRODUCT ARG=" + metstr2 + "," + getShortcutLabel() + "_ones");
300 14 : metstr2 = getShortcutLabel() + "_" + metstr;
301 : }
302 : // Compute the diagonal elements
303 14 : readInputLine( getShortcutLabel() + "_prod: CUSTOM ARG=" + getShortcutLabel() + "_scaled," + metstr2 + " FUNC=2*(1-cos(x))*y PERIODIC=NO");
304 : std::string ncstr;
305 7 : Tools::convert( nrows, ncstr );
306 7 : Tools::convert( av->copyOutput(0)->getShape()[0], nrstr );
307 7 : std::string ones=" VALUES=1";
308 42 : for(unsigned i=1; i<av->copyOutput(0)->getNumberOfValues(); ++i) {
309 : ones += ",1";
310 : }
311 14 : readInputLine( getShortcutLabel() + "_matones: CONSTANT NROWS=" + nrstr + " NCOLS=" + ncstr + ones );
312 14 : readInputLine( getShortcutLabel() + "_diag: MATRIX_PRODUCT_DIAGONAL ARG=" + getShortcutLabel() + "_matones," + getShortcutLabel() + "_prod");
313 : // Sum everything
314 7 : if( !squared ) {
315 0 : readInputLine( getShortcutLabel() + "_2: COMBINE ARG=" + getShortcutLabel() + "_offdiag," + getShortcutLabel() + "_diag PERIODIC=NO");
316 : } else {
317 14 : readInputLine( getShortcutLabel() + ": COMBINE ARG=" + getShortcutLabel() + "_offdiag," + getShortcutLabel() + "_diag PERIODIC=NO");
318 : }
319 : } else {
320 5 : ActionWithValue* av=plumed.getActionSet().selectWithLabel<ActionWithValue*>( getShortcutLabel() + "_diffT" );
321 5 : plumed_assert( av && av->getNumberOfComponents()==1 );
322 5 : if( (av->copyOutput(0))->getRank()==1 ) {
323 8 : readInputLine( getShortcutLabel() + "_matvec: MATRIX_VECTOR_PRODUCT ARG=" + metstr + "," + getShortcutLabel() +"_diffT");
324 : } else {
325 2 : readInputLine( getShortcutLabel() + "_matvec: MATRIX_PRODUCT ARG=" + metstr + "," + getShortcutLabel() +"_diffT");
326 : }
327 5 : std::string olab = getShortcutLabel();
328 5 : if( !squared ) {
329 : olab += "_2";
330 : }
331 10 : readInputLine( olab + ": MATRIX_PRODUCT_DIAGONAL ARG=" + getShortcutLabel() + "_diff," + getShortcutLabel() +"_matvec");
332 : }
333 12 : if( !squared ) {
334 10 : readInputLine( getShortcutLabel() + ": CUSTOM ARG=" + getShortcutLabel() + "_2 FUNC=sqrt(x) PERIODIC=NO");
335 : }
336 12 : }
337 :
338 : }
339 : }
|