Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2020 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "KernelFunctions.h"
23 : #include "IFile.h"
24 : #include <iostream>
25 : #include <cmath>
26 :
27 : namespace PLMD {
28 :
29 : //+PLUMEDOC INTERNAL kernelfunctions
30 : /*
31 : Functions that are used to construct histograms
32 :
33 : Constructing histograms is something you learned to do relatively early in life. You perform an experiment a number of times,
34 : count the number of times each result comes up and then draw a bar graph that describes how often each of the results came up.
35 : This only works when there are a finite number of possible results. If the result a number between 0 and 1 the bar chart is
36 : less easy to draw as there are as many possible results as there are numbers between zero and one - an infinite number.
37 : To resolve this problem we replace probability, \f$P\f$ with probability density, \f$\pi\f$, and write the probability of getting
38 : a number between \f$a\f$ and \f$b\f$ as:
39 :
40 : \f[
41 : P = \int_{a}^b \textrm{d}x \pi(x)
42 : \f]
43 :
44 : To calculate probability densities from a set of results we use a process called kernel density estimation.
45 : Histograms are accumulated by adding up kernel functions, \f$K\f$, with finite spatial extent, that integrate to one.
46 : These functions are centered on each of the \f$n\f$-dimensional data points, \f$\mathbf{x}_i\f$. The overall effect of this
47 : is that each result we obtain in our experiments contributes to the probability density in a finite sized region of the space.
48 :
49 : Expressing all this mathematically in kernel density estimation we write the probability density as:
50 :
51 : \f[
52 : \pi(\mathbf{x}) = \sum_i K\left[ (\mathbf{x} - \mathbf{x}_i)^T \Sigma (\mathbf{x} - \mathbf{x}_i) \right]
53 : \f]
54 :
55 : where \f$\Sigma\f$ is an \f$n \times n\f$ matrix called the bandwidth that controls the spatial extent of
56 : the kernel. Whenever we accumulate a histogram (e.g. in \ref HISTOGRAM or in \ref METAD) we use this
57 : technique.
58 :
59 : There is thus some flexibility in the particular function we use for \f$K[\mathbf{r}]\f$ in the above.
60 : The following variants are available.
61 :
62 : <table align=center frame=void width=95%% cellpadding=5%%>
63 : <tr>
64 : <td> TYPE </td> <td> FUNCTION </td>
65 : </tr> <tr>
66 : <td> gaussian </td> <td> \f$f(r) = \frac{1}{(2 \pi)^{n} \sqrt{|\Sigma^{-1}|}} \exp\left(-0.5 r^2 \right)\f$ </td>
67 : </tr> <tr>
68 : <td> truncated-gaussian </td> <td> \f$f(r) = \frac{1}{(2 \pi)^{n} \sqrt{|\Sigma^{-1}|} \left(\frac{\mathrm{erf}(-6.25/sqrt{2}) - \mathrm{erf}(-6.25/sqrt{2})}{2}\right)^n} \exp\left(-0.5 r^2 \right)\f$ </td>
69 : </tr> <tr>
70 : <td> triangular </td> <td> \f$f(r) = \frac{3}{V} ( 1 - | r | )H(1-|r|) \f$ </td>
71 : </tr> <tr>
72 : <td> uniform </td> <td> \f$f(r) = \frac{1}{V}H(1-|r|)\f$ </td>
73 : </tr>
74 : </table>
75 :
76 : In the above \f$H(y)\f$ is a function that is equal to one when \f$y>0\f$ and zero when \f$y \le 0\f$. \f$n\f$ is
77 : the dimensionality of the vector \f$\mathbf{x}\f$ and \f$V\f$ is the volume of an ellipse in an \f$n\f$ dimensional
78 : space which is given by:
79 :
80 : \f{eqnarray*}{
81 : V &=& | \Sigma^{-1} | \frac{ \pi^{\frac{n}{2}} }{\left( \frac{n}{2} \right)! } \qquad \textrm{for even} \quad n \\
82 : V &=& | \Sigma^{-1} | \frac{ 2^{\frac{n+1}{2}} \pi^{\frac{n-1}{2}} }{ n!! }
83 : \f}
84 :
85 : In \ref METAD the normalization constants are ignored so that the value of the function at \f$r=0\f$ is equal
86 : to one. In addition in \ref METAD we must be able to differentiate the bias in order to get forces. This limits
87 : the kernels we can use in this method. Notice also that Gaussian kernels should have infinite support. When used
88 : with grids, however, they are assumed to only be non-zero over a finite range. The difference between the
89 : truncated-gaussian and regular gaussian is that the truncated gaussian is scaled so that its integral over the grid
90 : is equal to one when it is normalized. The integral of a regular gaussian when it is evaluated on a grid will be
91 : slightly less that one because of the truncation of a function that should have infinite support.
92 : */
93 : //+ENDPLUMEDOC
94 :
95 12 : KernelFunctions::KernelFunctions( const std::string& input ) {
96 24 : std::vector<std::string> data=Tools::getWords(input);
97 : std::string name=data[0];
98 : data.erase(data.begin());
99 :
100 : std::vector<double> at;
101 24 : bool foundc = Tools::parseVector(data,"CENTER",at);
102 12 : if(!foundc) plumed_merror("failed to find center keyword in definition of kernel");
103 : std::vector<double> sig;
104 24 : bool founds = Tools::parseVector(data,"SIGMA",sig);
105 12 : if(!founds) plumed_merror("failed to find sigma keyword in definition of kernel");
106 :
107 24 : bool multi=false; Tools::parseFlag(data,"MULTIVARIATE",multi);
108 24 : bool vonmises=false; Tools::parseFlag(data,"VON-MISSES",vonmises);
109 12 : if( center.size()==1 && multi ) plumed_merror("one dimensional kernel cannot be multivariate");
110 12 : if( center.size()==1 && vonmises ) plumed_merror("one dimensional kernal cannot be von-misses");
111 12 : if( center.size()==1 && sig.size()!=1 ) plumed_merror("size mismatch between center size and sigma size");
112 12 : if( multi && center.size()>1 && sig.size()!=0.5*center.size()*(center.size()-1) ) plumed_merror("size mismatch between center size and sigma size");
113 24 : if( !multi && center.size()>1 && sig.size()!=center.size() ) plumed_merror("size mismatch between center size and sigma size");
114 :
115 : double h;
116 36 : bool foundh = Tools::parse(data,"HEIGHT",h);
117 12 : if( !foundh) h=1.0;
118 :
119 12 : if( multi ) setData( at, sig, name, "MULTIVARIATE", h );
120 12 : else if( vonmises ) setData( at, sig, name, "VON-MISSES", h );
121 24 : else setData( at, sig, name, "DIAGONAL", h );
122 12 : }
123 :
124 30220 : KernelFunctions::KernelFunctions( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const std::string& mtype, const double& w ) {
125 30220 : setData( at, sig, type, mtype, w );
126 30220 : }
127 :
128 0 : KernelFunctions::KernelFunctions( const KernelFunctions* in ):
129 0 : dtype(in->dtype),
130 0 : ktype(in->ktype),
131 : center(in->center),
132 : width(in->width),
133 0 : height(in->height)
134 : {
135 0 : }
136 :
137 30232 : void KernelFunctions::setData( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const std::string& mtype, const double& w ) {
138 :
139 30232 : height=w;
140 259820 : center.resize( at.size() ); for(unsigned i=0; i<at.size(); ++i) center[i]=at[i];
141 268834 : width.resize( sig.size() ); for(unsigned i=0; i<sig.size(); ++i) width[i]=sig[i];
142 30232 : if( mtype=="MULTIVARIATE" ) dtype=multi;
143 25820 : else if( mtype=="VON-MISSES" ) dtype=vonmises;
144 25811 : else if( mtype=="DIAGONAL" ) dtype=diagonal;
145 0 : else plumed_merror(mtype + " is not a valid metric type");
146 :
147 : // Setup the kernel type
148 60456 : if(type=="GAUSSIAN" || type=="gaussian" ) {
149 30228 : ktype=gaussian;
150 8 : } else if(type=="TRUNCATED-GAUSSIAN" || type=="truncated-gaussian" ) {
151 0 : ktype=truncatedgaussian;
152 8 : } else if(type=="UNIFORM" || type=="uniform") {
153 0 : ktype=uniform;
154 4 : } else if(type=="TRIANGULAR" || type=="triangular") {
155 4 : ktype=triangular;
156 : } else {
157 0 : plumed_merror(type+" is an invalid kernel type\n");
158 : }
159 30232 : }
160 :
161 24625 : void KernelFunctions::normalize( const std::vector<Value*>& myvals ) {
162 :
163 : double det=1.;
164 : unsigned ncv=ndim();
165 24625 : if(dtype==diagonal) {
166 268828 : for(unsigned i=0; i<width.size(); ++i) det*=width[i]*width[i];
167 53 : } else if(dtype==multi) {
168 44 : Matrix<double> mymatrix( getMatrix() ), myinv( ncv, ncv );
169 44 : Invert(mymatrix,myinv); double logd;
170 44 : logdet( myinv, logd );
171 44 : det=std::exp(logd);
172 : }
173 24625 : if( dtype==diagonal || dtype==multi ) {
174 : double volume;
175 24616 : if( ktype==gaussian ) {
176 24616 : volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 );
177 0 : } else if( ktype==truncatedgaussian ) {
178 : // This makes it so the gaussian integrates to one over the range over which it has support
179 : const double DP2CUTOFF=sqrt(6.25);
180 0 : volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 ) * pow( 0.5 * ( erf(DP2CUTOFF) - erf(-DP2CUTOFF) ), ncv);
181 0 : } else if( ktype==uniform || ktype==triangular ) {
182 0 : if( ncv%2==1 ) {
183 : double dfact=1;
184 0 : for(unsigned i=1; i<ncv; i+=2) dfact*=static_cast<double>(i);
185 0 : volume=( pow( pi, (ncv-1)/2 ) ) * ( pow( 2., (ncv+1)/2 ) ) / dfact;
186 : } else {
187 : double fact=1.;
188 0 : for(unsigned i=1; i<ncv/2; ++i) fact*=static_cast<double>(i);
189 0 : volume=pow( pi,ncv/2 ) / fact;
190 : }
191 0 : if(ktype==uniform) volume*=det;
192 0 : else if(ktype==triangular) volume*=det / 3.;
193 : } else {
194 0 : plumed_merror("not a valid kernel type");
195 : }
196 24616 : height /= volume;
197 24616 : return;
198 : }
199 9 : plumed_assert( dtype==vonmises && ktype==gaussian );
200 : // Now calculate determinant for aperiodic variables
201 : unsigned naper=0;
202 41 : for(unsigned i=0; i<ncv; ++i) {
203 32 : if( !myvals[i]->isPeriodic() ) naper++;
204 : }
205 : // Now construct sub matrix
206 : double volume=1;
207 9 : if( naper>0 ) {
208 : unsigned isub=0;
209 2 : Matrix<double> mymatrix( getMatrix() ), mysub( naper, naper );
210 6 : for(unsigned i=0; i<ncv; ++i) {
211 4 : if( myvals[i]->isPeriodic() ) continue;
212 : unsigned jsub=0;
213 6 : for(unsigned j=0; j<ncv; ++j) {
214 4 : if( myvals[j]->isPeriodic() ) continue;
215 2 : mysub( isub, jsub ) = mymatrix( i, j ); jsub++;
216 : }
217 2 : isub++;
218 : }
219 : Matrix<double> myisub( naper, naper ); double logd;
220 2 : Invert( mysub, myisub ); logdet( myisub, logd );
221 2 : det=std::exp(logd);
222 2 : volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 );
223 : }
224 :
225 : // Calculate volume of periodic variables
226 : unsigned nper=0;
227 41 : for(unsigned i=0; i<ncv; ++i) {
228 32 : if( myvals[i]->isPeriodic() ) nper++;
229 : }
230 :
231 : // Now construct sub matrix
232 9 : if( nper>0 ) {
233 : unsigned isub=0;
234 7 : Matrix<double> mymatrix( getMatrix() ), mysub( nper, nper );
235 35 : for(unsigned i=0; i<ncv; ++i) {
236 28 : if( !myvals[i]->isPeriodic() ) continue;
237 : unsigned jsub=0;
238 70 : for(unsigned j=0; j<ncv; ++j) {
239 56 : if( !myvals[j]->isPeriodic() ) continue;
240 28 : mysub( isub, jsub ) = mymatrix( i, j ); jsub++;
241 : }
242 14 : isub++;
243 : }
244 : Matrix<double> eigvec( nper, nper );
245 7 : std::vector<double> eigval( nper );
246 7 : diagMat( mysub, eigval, eigvec );
247 : unsigned iper=0; volume=1;
248 35 : for(unsigned i=0; i<ncv; ++i) {
249 28 : if( myvals[i]->isPeriodic() ) {
250 56 : volume *= myvals[i]->getMaxMinusMin()*Tools::bessel0(eigval[iper])*std::exp(-eigval[iper]);
251 14 : iper++;
252 : }
253 : }
254 : }
255 9 : height /= volume;
256 : }
257 :
258 10277 : double KernelFunctions::getCutoff( const double& width ) const {
259 : const double DP2CUTOFF=6.25;
260 10277 : if( ktype==gaussian || ktype==truncatedgaussian ) return sqrt(2.0*DP2CUTOFF)*width;
261 0 : else if(ktype==triangular ) return width;
262 0 : else if(ktype==uniform) return width;
263 0 : else plumed_merror("No valid kernel type");
264 : return 0.0;
265 : }
266 :
267 5132 : std::vector<double> KernelFunctions::getContinuousSupport( ) const {
268 : unsigned ncv=ndim();
269 5132 : std::vector<double> support( ncv );
270 5132 : if(dtype==diagonal) {
271 3532 : for(unsigned i=0; i<ncv; ++i) support[i]=getCutoff(width[i]);
272 4412 : } else if(dtype==multi) {
273 4412 : Matrix<double> mymatrix( getMatrix() ), myinv( ncv,ncv );
274 4412 : Invert(mymatrix,myinv);
275 4412 : Matrix<double> myautovec(ncv,ncv); std::vector<double> myautoval(ncv);
276 4412 : diagMat(myinv,myautoval,myautovec);
277 : double maxautoval=0.;
278 : unsigned ind_maxautoval=0;
279 22148 : for (unsigned i=0; i<ncv; i++) {
280 17736 : if(myautoval[i]>maxautoval) {maxautoval=myautoval[i]; ind_maxautoval=i;}
281 : }
282 22148 : for(unsigned i=0; i<ncv; ++i) {
283 17736 : double extent=fabs(sqrt(maxautoval)*myautovec(i,ind_maxautoval));
284 17736 : support[i]=getCutoff( extent );
285 : }
286 : } else {
287 0 : plumed_merror("cannot find support if metric is not multi or diagonal type");
288 : }
289 5132 : return support;
290 : }
291 :
292 3410 : std::vector<unsigned> KernelFunctions::getSupport( const std::vector<double>& dx ) const {
293 3410 : plumed_assert( ndim()==dx.size() );
294 3410 : std::vector<unsigned> support( dx.size() );
295 3410 : std::vector<double> vv=getContinuousSupport( );
296 40835 : for(unsigned i=0; i<dx.size(); ++i) support[i]=static_cast<unsigned>(ceil( vv[i]/dx[i] ));
297 3410 : return support;
298 : }
299 :
300 23604933 : double KernelFunctions::evaluate( const std::vector<Value*>& pos, std::vector<double>& derivatives, bool usederiv, bool doInt, double lowI_, double uppI_) const {
301 : plumed_dbg_assert( pos.size()==ndim() && derivatives.size()==ndim() );
302 : #ifndef NDEBUG
303 : if( usederiv ) plumed_massert( ktype!=uniform, "step function can not be differentiated" );
304 : #endif
305 23604933 : if(doInt) {
306 : plumed_dbg_assert(center.size()==1);
307 0 : if(pos[0]->get()<lowI_) pos[0]->set(lowI_);
308 0 : if(pos[0]->get()>uppI_) pos[0]->set(uppI_);
309 : }
310 : double r2=0;
311 23604933 : if(dtype==diagonal) {
312 89430189 : for(unsigned i=0; i<ndim(); ++i) {
313 226359246 : derivatives[i]=-pos[i]->difference( center[i] ) / width[i];
314 37726541 : r2+=derivatives[i]*derivatives[i];
315 37726541 : derivatives[i] /= width[i];
316 : }
317 9627826 : } else if(dtype==multi) {
318 9627680 : Matrix<double> mymatrix( getMatrix() );
319 65452752 : for(unsigned i=0; i<mymatrix.nrows(); ++i) {
320 55825072 : double dp_i, dp_j; derivatives[i]=0;
321 83737608 : dp_i=-pos[i]->difference( center[i] );
322 191505736 : for(unsigned j=0; j<mymatrix.ncols(); ++j) {
323 81796600 : if(i==j) dp_j=dp_i;
324 215536256 : else dp_j=-pos[j]->difference( center[j] );
325 :
326 163593200 : derivatives[i]+=mymatrix(i,j)*dp_j;
327 163593200 : r2+=dp_i*dp_j*mymatrix(i,j);
328 : }
329 : }
330 146 : } else if(dtype==vonmises) {
331 438 : std::vector<double> costmp( ndim() ), sintmp( ndim() ), sinout( ndim(), 0.0 );
332 690 : for(unsigned i=0; i<ndim(); ++i) {
333 544 : if( pos[i]->isPeriodic() ) {
334 1008 : sintmp[i]=sin( 2.*pi*(pos[i]->get() - center[i])/pos[i]->getMaxMinusMin() );
335 1008 : costmp[i]=cos( 2.*pi*(pos[i]->get() - center[i])/pos[i]->getMaxMinusMin() );
336 : } else {
337 60 : sintmp[i]=pos[i]->get() - center[i];
338 20 : costmp[i]=1.0;
339 : }
340 : }
341 :
342 146 : Matrix<double> mymatrix( getMatrix() );
343 690 : for(unsigned i=0; i<mymatrix.nrows(); ++i) {
344 544 : derivatives[i]=0;
345 272 : if( pos[i]->isPeriodic() ) {
346 504 : r2+=2*( 1 - costmp[i] )*mymatrix(i,i);
347 : } else {
348 40 : r2+=sintmp[i]*sintmp[i]*mymatrix(i,i);
349 : }
350 1320 : for(unsigned j=0; j<mymatrix.ncols(); ++j) {
351 1280 : if( i!=j ) sinout[i]+=mymatrix(i,j)*sintmp[j];
352 : }
353 1360 : derivatives[i] = mymatrix(i,i)*sintmp[i] + sinout[i]*costmp[i];
354 1028 : if( pos[i]->isPeriodic() ) derivatives[i] *= (2*pi/pos[i]->getMaxMinusMin());
355 : }
356 1380 : for(unsigned i=0; i<sinout.size(); ++i) r2+=sintmp[i]*sinout[i];
357 : }
358 : double kderiv, kval;
359 23604933 : if(ktype==gaussian || ktype==truncatedgaussian) {
360 23570097 : kval=height*std::exp(-0.5*r2); kderiv=-kval;
361 : } else {
362 34836 : double r=sqrt(r2);
363 34836 : if(ktype==triangular) {
364 34836 : if( r<1.0 ) {
365 : if(r==0) kderiv=0;
366 9753 : kderiv=-1; kval=height*( 1. - fabs(r) );
367 : } else {
368 : kval=0.; kderiv=0.;
369 : }
370 0 : } else if(ktype==uniform) {
371 : kderiv=0.;
372 0 : if(r<1.0) kval=height;
373 : else kval=0;
374 : } else {
375 0 : plumed_merror("Not a valid kernel type");
376 : }
377 34836 : kderiv*=height / r ;
378 : }
379 154883631 : for(unsigned i=0; i<ndim(); ++i) derivatives[i]*=kderiv;
380 23604933 : if(doInt) {
381 0 : if((pos[0]->get() <= lowI_ || pos[0]->get() >= uppI_) && usederiv ) for(unsigned i=0; i<ndim(); ++i)derivatives[i]=0;
382 : }
383 23604933 : return kval;
384 : }
385 :
386 4521 : std::unique_ptr<KernelFunctions> KernelFunctions::read( IFile* ifile, const bool& cholesky, const std::vector<std::string>& valnames ) {
387 : double h;
388 9042 : if( !ifile->scanField("height",h) ) return NULL;;
389 :
390 9030 : std::string sss; ifile->scanField("multivariate",sss);
391 12453 : std::string ktype="gaussian"; if( ifile->FieldExist("kerneltype") ) ifile->scanField("kerneltype",ktype);
392 8945 : plumed_massert( sss=="false" || sss=="true" || sss=="von-misses", "multivariate flag must be either false, true or von-misses");
393 :
394 : // Read the position of the center
395 9030 : std::vector<double> cc( valnames.size() );
396 36246 : for(unsigned i=0; i<valnames.size(); ++i) ifile->scanField(valnames[i],cc[i]);
397 :
398 : std::vector<double> sig;
399 4515 : if( sss=="false" ) {
400 94 : sig.resize( valnames.size() );
401 752 : for(unsigned i=0; i<valnames.size(); ++i) {
402 376 : ifile->scanField("sigma_"+valnames[i],sig[i]);
403 188 : if( !cholesky ) sig[i]=sqrt(sig[i]);
404 : }
405 188 : return std::unique_ptr<KernelFunctions>(new KernelFunctions( cc, sig, ktype, "DIAGONAL", h ) );
406 : }
407 :
408 4421 : unsigned ncv=valnames.size();
409 4421 : sig.resize( (ncv*(ncv+1))/2 );
410 : Matrix<double> upper(ncv,ncv), lower(ncv,ncv), mymult( ncv, ncv ), invmatrix(ncv,ncv);
411 22189 : for(unsigned i=0; i<ncv; ++i) {
412 35666 : for(unsigned j=0; j<ncv-i; j++) {
413 93737 : ifile->scanField("sigma_" +valnames[j+i] + "_" + valnames[j], lower(j+i,j) );
414 40173 : upper(j,j+i)=lower(j+i,j); mymult(j+i,j)=mymult(j,j+i)=lower(j+i,j);
415 : }
416 : }
417 4421 : if( cholesky ) mult(lower,upper,mymult);
418 4421 : Invert( mymult, invmatrix );
419 : unsigned k=0;
420 22189 : for(unsigned i=0; i<ncv; i++) {
421 49057 : for(unsigned j=i; j<ncv; j++) { sig[k]=invmatrix(i,j); k++; }
422 : }
423 8833 : if( sss=="true" ) return std::unique_ptr<KernelFunctions>(new KernelFunctions( cc, sig, ktype, "MULTIVARIATE", h ) );
424 18 : return std::unique_ptr<KernelFunctions>(new KernelFunctions( cc, sig, ktype, "VON-MISSES", h ) );
425 : }
426 :
427 5517 : }
|