Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 by Glen Hocky, New York University on behalf of authors
3 :
4 : The sizeshape module is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU Lesser General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : The sizeshape module is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU Lesser General Public License for more details.
13 :
14 : You should have received a copy of the GNU Lesser General Public License
15 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
16 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
17 : #include "colvar/Colvar.h"
18 : #include "core/ActionRegister.h"
19 : #include "tools/Pbc.h"
20 : #include "tools/File.h" // Input and output from files
21 : #include "tools/Matrix.h" // Linear Algebra operations
22 : #include <sstream>
23 : #include <cmath>
24 : #include "tools/Communicator.h" // All the MPI related stuffs
25 :
26 : namespace PLMD {
27 : namespace sizeshape {
28 :
29 : //+PLUMEDOC COLVAR SIZESHAPE_POSITION_LINEAR_PROJ
30 : /*
31 : Calculates a linear projection in the space of a given reference configurational distribution in size-and-shape space.
32 :
33 : The linear projection is given by:
34 :
35 : $$
36 : l(\mathbf{x}) = \mathbf{v}\cdot(\mathbf{R}\cdot(\mathbf{x}(t) - \vec{{\zeta}}(t)) - \mathbf{\mu}),
37 : $$
38 :
39 : Where $\mathbf{v}$ is a vector of linear coefficients, $\mathbf{x}(t)$ is the configuration at time t, $\vec{\zeta}(t)$ is the difference in the geometric mean of the current configuration and that of the reference configuration $\mathbf{\mu}$. $\vec{\zeta}(t) = \frac{1}{N} \sum_{i=1}^N \vec{x_{i}}(t) - \frac{1}{N} \sum_{i=1}^N \vec{\mu_{i}}(t)$, for N atoms.
40 :
41 : $\mathbf{R}$ is an optimal rotation matrix that minimizes the Mahalanobis distance between the current configuration and reference. $\mathbf{R}$ is obtained by using Kabsch algorithm within the code. The Mahalanobis distance is given as:
42 :
43 : $$
44 : d(\mathbf{x}, \mathbf{\mu}, \mathbf{\Sigma}) = \sqrt{(\mathbf{x}-\mathbf{\mu})^T \mathbf{\Sigma}^{-1} (\mathbf{x}-\mathbf{\mu})}
45 : $$
46 :
47 : where, $\mathbf{\Sigma}^{-1}$ is the $N\times N$ precision matrix. See also [SIZESHAPE_POSITION_MAHA_DIST](SIZESHAPE_POSITION_MAHA_DIST.md) for information about calculating Mahalanobis distance in size-and-shape space.
48 :
49 : Size-and-shape Gaussian Mixture Model (shapeGMM) \cite Heidi-shapeGMM-2022 is a probabilistic clustering technique that is used to perform structural clusteing on ensemble of molecular configurations and to obtain reference $(\mathbf{\mu})$ and precision $(\mathbf{\Sigma}^{-1})$ corresponding to each of the cluster centers.
50 : Please chcek out <a href="https://github.com/mccullaghlab/shapeGMMTorch">shapeGMMTorch-GitHub</a> and <a href="https://pypi.org/project/shapeGMMTorch/"> shapeGMMTorch-PyPI</a> for examples and informations on preforming shapeGMM clustering.
51 :
52 : ##Examples
53 :
54 : In the following example, a group is defined with atom indices of selected atoms and then linear projection is calculated for the given reference, precision and coefficients. Each file is a space separated list of 3N floating point numbers.
55 :
56 : ```plumed
57 : #SETTINGS INPUTFILES=regtest/sizeshape/rt-sizeshape/global_avg.txt
58 : #SETTINGS INPUTFILES=regtest/sizeshape/rt-sizeshape/global_precision.txt
59 : #SETTINGS INPUTFILES=regtest/sizeshape/rt-sizeshape/ld1_scalings.txt
60 :
61 : UNITS LENGTH=A TIME=ps ENERGY=kcal/mol
62 : GROUP ATOMS=18,20,22,31,33,35,44,46,48,57,59,61,70,72,74,83,85,87,96,98,100,109,111 LABEL=ga_list
63 : proj: SIZESHAPE_POSITION_LINEAR_PROJ ...
64 : REFERENCE=regtest/sizeshape/rt-sizeshape/global_avg.txt
65 : PRECISION=regtest/sizeshape/rt-sizeshape/global_precision.txt
66 : COEFFS=regtest/sizeshape/rt-sizeshape/ld1_scalings.txt
67 : GROUP=ga_list
68 : ...
69 :
70 : PRINT ARG=proj STRIDE=1 FILE=COLVAR FMT=%8.8f
71 : ```
72 :
73 : */
74 : //+ENDPLUMEDOC
75 :
76 :
77 : class position_linear_proj : public Colvar {
78 :
79 : private:
80 : bool pbc, serial;
81 : std::string prec_f_name; // precision file name
82 : std::string ref_f_name; // reference file name
83 : std::string coeffs_f_name; // file containing linear coeffs
84 : IFile in_; // create an object of class IFile
85 : Log out_;
86 : Matrix<double> ref_str; // coords of reference
87 : Matrix<double> mobile_str; // coords of mobile
88 : Matrix<double> prec; // precision data
89 : Matrix<double> rotation;
90 : std::vector<double> linear_coeffs; // Linear Coefficients
91 : Matrix<double> derv_numeric;
92 : void readinputs(); // reads the input data
93 : double proj; // projection value
94 : std::vector<AtomNumber> atom_list; // list of atoms
95 : const double SMALL = 1.0E-30;
96 : const double delta = 0.00001;
97 : public:
98 : static void registerKeywords( Keywords& keys );
99 : explicit position_linear_proj(const ActionOptions&);
100 : double determinant(int n, const std::vector<std::vector<double>>* B);
101 : void kabsch_rot_mat(); // gives rotation matrix
102 : double cal_position_linear_proj(); // calculates the linear projection
103 : void numeric_grad(); // calculates the numeric gradient
104 : // active methods:
105 : void calculate() override;
106 : };
107 :
108 : PLUMED_REGISTER_ACTION(position_linear_proj, "SIZESHAPE_POSITION_LINEAR_PROJ")
109 :
110 : // register keywords function
111 7 : void position_linear_proj::registerKeywords( Keywords& keys ) {
112 7 : Colvar::registerKeywords( keys );
113 7 : keys.add("compulsory", "PRECISION", "Precision Matrix (inverse of covariance)." );
114 7 : keys.add("compulsory", "REFERENCE", "Coordinates of the reference structure.");
115 7 : keys.add("atoms","GROUP","Group of atoms being used");
116 7 : keys.add("compulsory", "COEFFS", "Vector of linear coefficients.");
117 7 : keys.addFlag("SERIAL",false,"Perform the calculation in serial, for debug purposes only.");
118 14 : keys.setValueDescription("scalar","the linear projection");
119 7 : }
120 :
121 : // constructor function
122 5 : position_linear_proj::position_linear_proj(const ActionOptions&ao):
123 : PLUMED_COLVAR_INIT(ao),
124 5 : pbc(true),
125 5 : serial(false),
126 5 : proj(0),
127 10 : prec_f_name(""),
128 5 : ref_f_name(""),
129 10 : coeffs_f_name("") { // Note! no comma here in the last line.
130 5 : parseFlag("SERIAL",serial);
131 5 : parseAtomList("GROUP",atom_list);
132 5 : parse("REFERENCE", ref_f_name);
133 5 : parse("PRECISION", prec_f_name);
134 5 : parse("COEFFS", coeffs_f_name);
135 5 : bool nopbc=!pbc;
136 5 : parseFlag("NOPBC",nopbc);
137 5 : pbc=!nopbc;
138 :
139 5 : checkRead();
140 :
141 5 : log.printf(" of %u atoms\n",static_cast<unsigned>(atom_list.size()));
142 120 : for(unsigned int i=0; i<atom_list.size(); ++i) {
143 115 : log.printf(" %d", atom_list[i].serial());
144 : }
145 :
146 5 : if(pbc) {
147 5 : log.printf("\n using periodic boundary conditions\n");
148 : } else {
149 0 : log.printf("\n without periodic boundary conditions\n");
150 : }
151 :
152 5 : addValueWithDerivatives();
153 5 : setNotPeriodic();
154 :
155 5 : requestAtoms(atom_list);
156 :
157 : // call the readinputs() function here
158 5 : readinputs();
159 :
160 5 : }
161 :
162 : // read inputs function
163 5 : void position_linear_proj::readinputs() {
164 : unsigned N=getNumberOfAtoms();
165 : // read ref coords
166 5 : in_.open(ref_f_name);
167 :
168 5 : ref_str.resize(N,3);
169 5 : prec.resize(N,N);
170 5 : derv_numeric.resize(N,3);
171 :
172 : std::string line_, val_;
173 : unsigned c_=0;
174 :
175 120 : while (c_ < N) {
176 115 : in_.getline(line_);
177 : std::vector<std::string> items_;
178 115 : std::stringstream check_(line_);
179 :
180 460 : while(std::getline(check_, val_, ' ')) {
181 345 : items_.push_back(val_);
182 : }
183 460 : for(int i=0; i<3; ++i) {
184 345 : ref_str(c_,i) = std::stold(items_[i]);
185 : }
186 115 : c_ += 1;
187 115 : }
188 5 : in_.close();
189 :
190 : //read precision
191 5 : in_.open(prec_f_name);
192 :
193 : std::string line, val;
194 : unsigned int c = 0;
195 :
196 120 : while(c < N) {
197 115 : in_.getline(line);
198 :
199 : // vector for storing the objects
200 : std::vector<std::string> items;
201 :
202 : // stringstream helps to treat a string like an ifstream!
203 115 : std::stringstream check(line);
204 :
205 2760 : while (std::getline(check, val, ' ')) {
206 2645 : items.push_back(val);
207 : }
208 :
209 2760 : for(unsigned int i=0; i<N; ++i) {
210 2645 : prec(c, i) = std::stold(items[i]);
211 : }
212 :
213 115 : c += 1;
214 :
215 115 : }
216 5 : in_.close();
217 :
218 : // read in the linear coeffs
219 5 : in_.open(coeffs_f_name);
220 : unsigned n_=0;
221 : std::string l_;
222 350 : while (n_ < N*3) {
223 345 : in_.getline(l_);
224 345 : linear_coeffs.push_back(std::stod(l_));
225 345 : n_ += 1;
226 : }
227 5 : linear_coeffs.resize(N*3);
228 :
229 5 : in_.close();
230 :
231 5 : }
232 :
233 :
234 :
235 1430 : double position_linear_proj::determinant(int n, const std::vector<std::vector<double>>* B) {
236 :
237 1430 : std::vector<std::vector<double>> A(n, std::vector<double>(n, 0));
238 : // make a copy first!
239 5720 : for(int i=0; i<n; ++i) {
240 17160 : for(int j=0; j<n; ++j) {
241 12870 : A[i][j] = (*B)[i][j];
242 : }
243 : }
244 :
245 :
246 : // It calculates determinant of a matrix using partial pivoting.
247 :
248 : double det = 1;
249 :
250 : // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
251 4290 : for ( int i = 0; i < n - 1; i++ ) {
252 : // Partial pivot: find row r below with largest element in column i
253 : int r = i;
254 2860 : double maxA = std::abs( A[i][i] );
255 7150 : for ( int k = i + 1; k < n; k++ ) {
256 4290 : double val = std::abs( A[k][i] );
257 4290 : if ( val > maxA ) {
258 : r = k;
259 : maxA = val;
260 : }
261 : }
262 2860 : if ( r != i ) {
263 10010 : for ( int j = i; j < n; j++ ) {
264 7150 : std::swap( A[i][j], A[r][j] );
265 : }
266 2860 : det = -det;
267 : }
268 :
269 : // Row operations to make upper-triangular
270 2860 : double pivot = A[i][i];
271 2860 : if (std::abs( pivot ) < SMALL ) {
272 : return 0.0; // Singular matrix
273 : }
274 :
275 7150 : for ( int r = i + 1; r < n; r++ ) { // On lower rows
276 4290 : double multiple = A[r][i] / pivot; // Multiple of row i to clear element in ith column
277 15730 : for ( int j = i; j < n; j++ ) {
278 11440 : A[r][j] -= multiple * A[i][j];
279 : }
280 : }
281 2860 : det *= pivot; // Determinant is product of diagonal
282 : }
283 :
284 1430 : det *= A[n-1][n-1];
285 :
286 1430 : return det;
287 1430 : }
288 :
289 : // kabsch rotation
290 715 : void position_linear_proj::kabsch_rot_mat() {
291 :
292 : unsigned N=getNumberOfAtoms();
293 :
294 715 : Matrix<double> mobile_str_T(3,N);
295 715 : Matrix<double> prec_dot_ref_str(N,3);
296 715 : Matrix<double> correlation(3,3);
297 :
298 :
299 715 : transpose(mobile_str, mobile_str_T);
300 715 : mult(prec, ref_str, prec_dot_ref_str);
301 715 : mult(mobile_str_T, prec_dot_ref_str, correlation);
302 :
303 :
304 715 : int rw = correlation.nrows();
305 715 : int cl = correlation.ncols();
306 715 : int sz = rw*cl;
307 :
308 : // SVD part (taking from plu2/src/tools/Matrix.h: pseudoInvert function)
309 :
310 715 : std::vector<double> da(sz);
311 : unsigned k=0;
312 :
313 : // Transfer the matrix to the local array
314 2860 : for (int i=0; i<cl; ++i)
315 8580 : for (int j=0; j<rw; ++j) {
316 6435 : da[k++]=static_cast<double>( correlation(j,i) ); // note! its [j][i] not [i][j]
317 : }
318 :
319 715 : int nsv, info, nrows=rw, ncols=cl;
320 : if(rw>cl) {
321 : nsv=cl;
322 : } else {
323 : nsv=rw;
324 : }
325 :
326 : // Create some containers for stuff from single value decomposition
327 715 : std::vector<double> S(nsv);
328 715 : std::vector<double> U(nrows*nrows);
329 715 : std::vector<double> VT(ncols*ncols);
330 715 : std::vector<int> iwork(8*nsv);
331 :
332 : // This optimizes the size of the work array used in lapack singular value decomposition
333 715 : int lwork=-1;
334 715 : std::vector<double> work(1);
335 715 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
336 : //if(info!=0) return info;
337 715 : if(info!=0) {
338 0 : log.printf("info:", info);
339 : }
340 :
341 : // Retrieve correct sizes for work and rellocate
342 715 : lwork=(int) work[0];
343 715 : work.resize(lwork);
344 :
345 : // This does the singular value decomposition
346 715 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
347 : //if(info!=0) return info;
348 715 : if(info!=0) {
349 0 : log.printf("info:", info);
350 : }
351 :
352 :
353 : // get U and VT in form of 2D vector (U_, VT_)
354 715 : std::vector<std::vector<double>> U_(nrows, std::vector<double>(nrows,0));
355 715 : std::vector<std::vector<double>> VT_(ncols, std::vector<double>(ncols,0));
356 :
357 : int c=0;
358 :
359 2860 : for(int i=0; i<nrows; ++i) {
360 8580 : for(int j=0; j<nrows; ++j) {
361 6435 : U_[j][i] = U[c];
362 6435 : c += 1;
363 : }
364 : }
365 : c = 0; // note! its [j][i] not [i][j]
366 2860 : for(int i=0; i<ncols; ++i) {
367 8580 : for(int j=0; j<ncols; ++j) {
368 6435 : VT_[j][i] = VT[c];
369 6435 : c += 1;
370 : }
371 : }
372 : c=0; // note! its [j][i] not [i][j]
373 :
374 :
375 : // calculate determinants
376 715 : double det_u = determinant(nrows, &U_);
377 715 : double det_vt = determinant(ncols, &VT_);
378 :
379 : // check!
380 715 : if (det_u * det_vt < 0.0) {
381 1144 : for(int i=0; i<nrows; ++i) {
382 858 : U_[i][nrows-1] *= -1;
383 : }
384 : }
385 :
386 :
387 : //Matrix<double> rotation(3,3);
388 715 : rotation.resize(3,3);
389 715 : Matrix<double> u(3,3), vt(3,3);
390 2860 : for(int i=0; i<3; ++i) {
391 8580 : for(int j=0; j<3; ++j) {
392 6435 : u(i,j)=U_[i][j];
393 6435 : vt(i,j)=VT_[i][j];
394 : }
395 : }
396 :
397 : // get rotation matrix
398 715 : mult(u, vt, rotation);
399 :
400 1430 : }
401 :
402 :
403 : // calculates linear projection
404 715 : double position_linear_proj::cal_position_linear_proj() {
405 :
406 : unsigned N=getNumberOfAtoms();
407 :
408 715 : Matrix<double> rotated_obj(N,3);
409 : // rotate the object
410 715 : mult(mobile_str, rotation, rotated_obj);
411 :
412 : // compute the displacement
413 715 : std::vector<double> disp(N*3);
414 : unsigned c=0;
415 17160 : for(unsigned int i=0; i<N; ++i) {
416 65780 : for(int j=0; j<3; ++j) {
417 49335 : disp[c] = (rotated_obj(i,j)-ref_str(i,j));
418 49335 : c+=1;
419 : }
420 : }
421 :
422 : //double proj_val = dotProduct(disp, linear_coeffs);
423 : double proj_val = 0.0;
424 50050 : for(unsigned int i=0; i<N*3; ++i) {
425 49335 : proj_val += (linear_coeffs[i]*disp[i]);
426 : }
427 :
428 715 : return proj_val;
429 : }
430 :
431 : // numeric gradient
432 25 : void position_linear_proj::numeric_grad() {
433 : // This function performs numerical derivative.
434 : unsigned N=getNumberOfAtoms();
435 :
436 : unsigned stride;
437 : unsigned rank;
438 25 : if(serial) {
439 : // when using components the parallelisation do not work
440 : stride=1;
441 : rank=0;
442 : } else {
443 25 : stride=comm.Get_size();
444 25 : rank=comm.Get_rank();
445 : }
446 :
447 255 : for(unsigned i=rank; i<N; i+=stride) {
448 920 : for (unsigned j=0; j<3; ++j) {
449 :
450 690 : mobile_str(i,j) += delta;
451 690 : kabsch_rot_mat();
452 690 : derv_numeric(i,j) = ((cal_position_linear_proj() - proj)/delta);
453 :
454 690 : mobile_str(i,j) -= delta;
455 : }
456 :
457 : }
458 :
459 25 : if(!serial) {
460 25 : if(!derv_numeric.getVector().empty()) {
461 25 : comm.Sum(&derv_numeric(0,0), derv_numeric.getVector().size());
462 : }
463 : }
464 :
465 :
466 600 : for(unsigned i=0; i<N; ++i) {
467 575 : Vector vi(derv_numeric(i,0), derv_numeric(i,1), derv_numeric(i,2) );
468 575 : setAtomsDerivatives(i, vi);
469 : }
470 :
471 : // clear the matrix (very important step!!)
472 25 : derv_numeric *= 0;
473 25 : }
474 :
475 :
476 : // calculator
477 25 : void position_linear_proj::calculate() {
478 :
479 25 : if(pbc) {
480 25 : makeWhole();
481 : }
482 : unsigned N=getNumberOfAtoms();
483 :
484 25 : mobile_str.resize(N,3);
485 :
486 : // load the mobile str
487 600 : for(unsigned int i=0; i<N; ++i) {
488 575 : Vector pos=getPosition(i); // const PLMD::Vector
489 2300 : for(unsigned j=0; j<3; ++j) {
490 1725 : mobile_str(i,j) = pos[j];
491 : }
492 : }
493 :
494 : // translating the structure to center of geometry
495 25 : double center_of_geometry[3]= {0.0, 0.0, 0.0};
496 :
497 600 : for(unsigned int i=0; i<N; ++i) {
498 575 : center_of_geometry[0] += mobile_str(i,0);
499 575 : center_of_geometry[1] += mobile_str(i,1);
500 575 : center_of_geometry[2] += mobile_str(i,2);
501 : }
502 :
503 600 : for(unsigned int i=0; i<N; ++i) {
504 2300 : for(int j=0; j<3; ++j) {
505 1725 : mobile_str(i,j) -= (center_of_geometry[j]/N);
506 : }
507 : }
508 :
509 25 : kabsch_rot_mat();
510 25 : proj = cal_position_linear_proj();
511 :
512 25 : numeric_grad();
513 25 : setBoxDerivativesNoPbc();
514 25 : setValue(proj);
515 :
516 :
517 25 : }
518 :
519 : }
520 : }
521 :
522 :
523 :
|