Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2020 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionWithArguments.h"
24 : #include "core/ActionRegister.h"
25 : #include "tools/ConjugateGradient.h"
26 : #include "tools/SwitchingFunction.h"
27 : #include "gridtools/GridSearch.h"
28 : #include "SMACOF.h"
29 :
30 : namespace PLMD {
31 : namespace dimred {
32 :
33 : //+PLUMEDOC DIMRED ARRANGE_POINTS
34 : /*
35 : Arrange points in a low dimensional space so that the (transformed) distances between points in the low dimensional space match the dissimilarities provided in an input matrix.
36 :
37 : This action and [PROJECT_POINTS](PROJECT_POINTS.md) are the workhorses for the implementation of [SKETCHMAP](SKETCHMAP.md) that is provided within PLUMED.
38 : ARRANGE_POINTS allows you to find a set of low dimenionsional coordinates, $\{x_k\}_\textrm{min}$, for a set of high-dimensional coordinates, $\{X_k\}$, by
39 : minimising this stress function:
40 :
41 : $$
42 : \chi(\{x_k\}) = \sum_{i=2}^N \sum_{j=1}^i w_{ij} [ D(X_i,X_j) - d(x_i,x_j) ]^2
43 : $$
44 :
45 : The $D$ here indicates that we are calculating the dissimilarity between the point $X_i$ and $X_j$, while the $d$ indicates that we are calculating the distance between
46 : the projections of points $i$ and $j$. In minimising the expression above we are thus finding a set of low-dimensional projections for the high dimensional points that
47 : were input. The $w_{ij}$ is a weight that determines how important reproducing the distance between atom $i$ and $j$
48 :
49 : The example input below illustrates how you can use ARRANGE_POINTS to project a high-dimensional representational of a trajectory in a low-dimensional space.
50 :
51 : ```plumed
52 : # Calcuate the instantaneous values of the three distances
53 : d1: DISTANCE ATOMS=1,2
54 : d2: DISTANCE ATOMS=3,4
55 : d3: DISTANCE ATOMS=5,6
56 :
57 : # Collect the calulated distances for later analysis
58 : ff: COLLECT_FRAMES STRIDE=1 ARG=d1,d2,d3
59 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
60 :
61 : # Generate an initial projection of the high dimensional points using MDS
62 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
63 :
64 : # Generate a matrix of w_ij values
65 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
66 :
67 : # And produce the projections
68 : proj: ARRANGE_POINTS ARG=mds-1,mds-2 TARGET1=mds_mat WEIGHTS1=weights
69 :
70 : # And print the projections to a file
71 : DUMPVECTOR ARG=proj.* FILE=colvar
72 : ```
73 :
74 : Here projections are generated once at the end of the trajectory and are output to a file called colvar. Initial projections of all the points are generated using [CLASSICAL_MDS](CLASSICAL_MDS.md).
75 : A better optimisation of the stress function above is then obtained by using the conjugate gradient algorithm. The `MINTYPE` option in ARRANGE_POINTS allows you to specify whether
76 : [conjugate gradient](https://en.wikipedia.org/wiki/Conjugate_gradient_method), the [smacof](https://en.wikipedia.org/wiki/Stress_majorization) algorithm or
77 : the pointwise global optimisation algorithm that was discussed in the original paper on sketch-map that is referenced below are used to optimise the stress function.
78 :
79 : ## Using RMSD distances
80 :
81 : If you wish to use the coordinates of the atoms directly when computing the dissimilarities in the high dimensional space you use an input similar to the one shown below:
82 :
83 : ```plumed
84 : # Collect the positions of the atoms for later analysis
85 : ff: COLLECT_FRAMES STRIDE=1 ATOMS=1-10
86 :
87 : # Generate an initial projection of the high dimensional points using MDS
88 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
89 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
90 :
91 : # Generate a matrix of w_ij values
92 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
93 :
94 : # And produce the projections
95 : proj: ARRANGE_POINTS ARG=mds-1,mds-2 TARGET1=mds_mat WEIGHTS1=weights
96 :
97 : # And print the projections to a file
98 : DUMPVECTOR ARG=proj.* FILE=colvar
99 : ```
100 :
101 : The dissimilarities between the atomic configurations here are computed as [RMSD](RMSD.md) distances between atomic configurations.
102 :
103 : ## Using transformed distances
104 :
105 : In [SKETCHMAP](SKETCHMAP.md) the stress function that is minimised is not the one given above. Instead of seeking to generate a projection, in which the
106 : distances between the projections are the same as the dissimilarities between the high-dimensional coordinates, the dissimilarities and distances are transformed by functions as illustrated below:
107 :
108 : $$
109 : \chi(\{x_k\}) = \sum_{i=2}^N \sum_{j=1}^i w_{ij} [ F[D(X_i,X_j)] - f[d(x_i,x_j)] ]^2
110 : $$
111 :
112 : The two functions $F$ and $f$ in this expression are usually different as you can see in the input below:
113 :
114 : ```plumed
115 : # Calcuate the instantaneous values of the three distances
116 : d1: DISTANCE ATOMS=1,2
117 : d2: DISTANCE ATOMS=3,4
118 : d3: DISTANCE ATOMS=5,6
119 :
120 : # Collect the calulated distances for later analysis
121 : ff: COLLECT_FRAMES STRIDE=1 ARG=d1,d2,d3
122 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
123 :
124 : # Generate an initial projection of the high dimensional points using MDS
125 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
126 :
127 : # Generate a matrix of w_ij values
128 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
129 :
130 : # Transform the dissimilarities with the function F
131 : fed: MORE_THAN ARG=mds_mat SQUARED SWITCH={SMAP R_0=4 A=3 B=2}
132 :
133 : # And produce the projections
134 : proj: ARRANGE_POINTS ARG=mds-1,mds-2 TARGET1=fed WEIGHTS1=weights FUNC1={SMAP R_0=4 A=1 B=2}
135 :
136 : # And print the projections to a file
137 : DUMPVECTOR ARG=proj.* FILE=colvar
138 : ```
139 :
140 : In the input above the function, $F$, that is applied on the dissimilarities is implemented using a [MORE_THAN](MORE_THAN.md) action. The function, $f$,
141 : that is applied on the distances in the low-dimensional space is specified using the `FUNC` keyword that is input to ARRANGE_POINTS.
142 :
143 : ## Using multiple targets
144 :
145 : At its most complex this action allows you to minimise a stress function such as the one below:
146 :
147 : $$
148 : \chi(\{x_k\}) = \sum_{i=2}^N \sum_{j=1}^i \sum_{n=1}^M w_{nij} [ F_n[D(X_i,X_j)] - f_n[d(x_i,x_j)] ]^2
149 : $$
150 :
151 : The input below shows how this can be implemted within PLUMED:
152 :
153 : ```plumed
154 : # Calcuate the instantaneous values of the three distances
155 : d1: DISTANCE ATOMS=1,2
156 : d2: DISTANCE ATOMS=3,4
157 : d3: DISTANCE ATOMS=5,6
158 :
159 : # Collect the calulated distances for later analysis
160 : ff: COLLECT_FRAMES STRIDE=1 ARG=d1,d2,d3
161 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
162 :
163 : # Generate an initial projection of the high dimensional points using MDS
164 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
165 :
166 : # Generate a matrix of w_ij values
167 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
168 : w1: CUSTOM ARG=weights FUNC=0.3*x PERIODIC=NO
169 : w2: CUSTOM ARG=weights FUNC=(1-0.3)*x PERIODIC=NO
170 :
171 : # Transform the dissimilarities with the function F
172 : fed: MORE_THAN ARG=mds_mat SQUARED SWITCH={SMAP R_0=4 A=3 B=2}
173 :
174 : # And produce the projections
175 : proj: ARRANGE_POINTS ...
176 : ARG=mds-1,mds-2
177 : TARGET1=mds_mat WEIGHTS1=w1 FUNC1={CUSTOM FUNC=1-sqrt(x2) R_0=1.0}
178 : TARGET2=fed WEIGHTS2=w2 FUNC2={SMAP R_0=4 A=1 B=2}
179 : ...
180 :
181 : # And print the projections to a file
182 : DUMPVECTOR ARG=proj.* FILE=colvar
183 : ```
184 :
185 : Here the sum over $M$ in the expression above has two terms. In the first of these terms $F_1$ is the identity so the
186 : input for `TARGET1` is the output from [EUCLIDEAN_DISTANCE](EUCLIDEAN_DISTANCE.md). $f_1$ is similarly the identity. To
187 : implement the identity here we use the input to `FUNC1` shown above. The input to this function is the input for one of
188 : the switching functions described in the documentation for [LESS_THAN](LESS_THAN.md). What we compute for the transformed
189 : distance is $1-s(d)$ where $s(d)$ is the switching function that is specified in input. Consequently, applying the
190 : function `1-sqrt(x2)` returns the distance.
191 :
192 : The second term in our sum over $M$ in the input above has the dissimilarities and distances transformed by the functions that
193 : we introduced in the previous section.
194 :
195 : ## Choosing an optimization algorithm
196 :
197 : The inputs above use conjugate gradients to optimize the stress function and to find the low dimensional projection. If you
198 : wish you can change the algorithm used to optimize the sketch-map function. For example, in the input below the
199 : [smacof](https://en.wikipedia.org/wiki/Stress_majorization) algorithm is used in place of conjugate gradients.
200 :
201 : ```plumed
202 : # Calcuate the instantaneous values of the three distances
203 : d1: DISTANCE ATOMS=1,2
204 : d2: DISTANCE ATOMS=3,4
205 : d3: DISTANCE ATOMS=5,6
206 :
207 : # Collect the calulated distances for later analysis
208 : ff: COLLECT_FRAMES STRIDE=1 ARG=d1,d2,d3
209 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
210 :
211 : # Generate an initial projection of the high dimensional points using MDS
212 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
213 :
214 : # Generate a matrix of w_ij values
215 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
216 :
217 : # And produce the projections
218 : proj: ARRANGE_POINTS ...
219 : ARG=mds-1,mds-2 TARGET1=mds_mat WEIGHTS1=weights
220 : MINTYPE=smacof
221 : ...
222 :
223 : # And print the projections to a file
224 : DUMPVECTOR ARG=proj.* FILE=colvar
225 : ```
226 :
227 : Alternatively, the following example uses a combination of conjugate gradients and a pointwise global optimisation to optimize the
228 : stress function
229 :
230 : ```plumed
231 : # Calcuate the instantaneous values of the three distances
232 : d1: DISTANCE ATOMS=1,2
233 : d2: DISTANCE ATOMS=3,4
234 : d3: DISTANCE ATOMS=5,6
235 :
236 : # Collect the calulated distances for later analysis
237 : ff: COLLECT_FRAMES STRIDE=1 ARG=d1,d2,d3
238 : ff_weights: CUSTOM ARG=ff_logweights FUNC=exp(x) PERIODIC=NO
239 :
240 : # Generate an initial projection of the high dimensional points using MDS
241 : mds: CLASSICAL_MDS ARG=ff NLOW_DIM=2
242 :
243 : # Generate a matrix of w_ij values
244 : weights: OUTER_PRODUCT ARG=ff_weights,ff_weights
245 :
246 : # And produce the projections
247 : proj: ARRANGE_POINTS ...
248 : ARG=mds-1,mds-2 TARGET1=mds_mat
249 : WEIGHTS1=weights MINTYPE=pointwise
250 : ...
251 :
252 : # And print the projections to a file
253 : DUMPVECTOR ARG=proj.* FILE=colvar
254 : ```
255 :
256 : This is the algorithm that is to optimize the stress function within [SKETCHMAP](SKETCHMAP.md).
257 :
258 : */
259 : //+ENDPLUMEDOC
260 :
261 : class ArrangePoints :
262 : public ActionWithValue,
263 : public ActionWithArguments {
264 : private:
265 : unsigned dimout, maxiter, ncycles, current_index;
266 : double cgtol, gbuf;
267 : std::vector<std::size_t> npoints, nfgrid;
268 : std::vector<double> mypos;
269 : double smacof_tol, smacof_reg;
270 : int dist_target;
271 : enum {conjgrad,pointwise,smacof} mintype;
272 : std::vector<SwitchingFunction> switchingFunction;
273 : void checkInputMatrix( const std::string& key, const unsigned& nvals, const std::vector<Value*>& mat ) const ;
274 : double recalculateSmacofWeights( const std::vector<double>& p, SMACOF& mysmacof ) const ;
275 : protected:
276 : double calculateStress( const std::vector<double>& p, std::vector<double>& d );
277 : double calculateFullStress( const std::vector<double>& p, std::vector<double>& d );
278 : public:
279 : static void registerKeywords( Keywords& keys );
280 : ArrangePoints( const ActionOptions& );
281 0 : unsigned getNumberOfDerivatives() override {
282 0 : return 0;
283 : }
284 : void prepare() override ;
285 : void calculate() override ;
286 : virtual void optimize( std::vector<double>& pos );
287 1 : void apply() override {}
288 : };
289 :
290 : PLUMED_REGISTER_ACTION(ArrangePoints,"ARRANGE_POINTS")
291 :
292 12 : void ArrangePoints::registerKeywords( Keywords& keys ) {
293 12 : Action::registerKeywords( keys );
294 12 : ActionWithValue::registerKeywords( keys );
295 12 : ActionWithArguments::registerKeywords( keys );
296 12 : keys.remove("NUMERICAL_DERIVATIVES");
297 24 : keys.addInputKeyword("compulsory","ARG","vector","the initial positions for the projections");
298 24 : keys.addInputKeyword("numbered","TARGET","matrix","the matrix of target quantities that you would like to match");
299 12 : keys.add("numbered","FUNC","a function that is applied on the distances between the points in the low dimensional space");
300 24 : keys.addInputKeyword("numbered","WEIGHTS","matrix","the matrix with the weights of the target quantities");
301 12 : keys.add("compulsory","MINTYPE","conjgrad","the method to use for the minimisation");
302 12 : keys.add("compulsory","MAXITER","1000","maximum number of optimization cycles for optimisation algorithms");
303 12 : keys.add("compulsory","CGTOL","1E-6","the tolerance for the conjugate gradient minimization");
304 12 : keys.add("compulsory","NCYCLES","5","the number of cycles of global optimization to attempt");
305 12 : keys.add("compulsory","BUFFER","1.1","grid extent for search is (max projection - minimum projection) multiplied by this value");
306 12 : keys.add("compulsory","CGRID_SIZE","10","number of points to use in each grid direction");
307 12 : keys.add("compulsory","FGRID_SIZE","0","interpolate the grid onto this number of points -- only works in 2D");
308 12 : keys.add("compulsory","SMACTOL","1E-4","the tolerance for the smacof algorithm");
309 12 : keys.add("compulsory","SMACREG","0.001","this is used to ensure that we don't divide by zero when updating weights for SMACOF algorithm");
310 24 : keys.addOutputComponent("coord","default","vector","the coordinates of the points in the low dimensional space");
311 12 : keys.addDOI("10.1073/pnas.1108486108");
312 12 : }
313 :
314 :
315 5 : ArrangePoints::ArrangePoints( const ActionOptions& ao ) :
316 : Action(ao),
317 : ActionWithValue(ao),
318 : ActionWithArguments(ao),
319 5 : current_index(0),
320 5 : dist_target(-1) {
321 5 : dimout = getNumberOfArguments();
322 5 : std::vector<std::size_t> shape(1);
323 5 : shape[0]=getPntrToArgument(0)->getNumberOfValues();
324 15 : for(unsigned i=0; i<getNumberOfArguments(); ++i) {
325 10 : if( shape[0]!=getPntrToArgument(i)->getNumberOfValues() ) {
326 0 : error("mismatch between sizes of input coordinates");
327 : }
328 : std::string num;
329 10 : Tools::convert( i+1, num );
330 10 : addComponent( "coord-" + num, shape );
331 20 : componentIsNotPeriodic( "coord-" + num );
332 : }
333 5 : std::vector<Value*> args( getArguments() ), target, weights;
334 : std::string sfd, errors;
335 : // Read in target "distances" and target weights
336 5 : for(unsigned i=1;; ++i) {
337 11 : target.resize(0);
338 22 : if( !parseArgumentList("TARGET",i,target) ) {
339 : break;
340 : }
341 : std::string inum;
342 6 : Tools::convert( i, inum );
343 6 : checkInputMatrix( "TARGET" + inum, shape[0], target );
344 12 : if( !parseArgumentList("WEIGHTS",i,weights) ) {
345 0 : error("missing WEIGHTS" + inum + " keyword in input");
346 : }
347 12 : checkInputMatrix( "WEIGHTS" + inum, shape[0], weights );
348 6 : args.push_back( target[0] );
349 6 : args.push_back( weights[0] );
350 6 : bool has_sf = parseNumbered("FUNC",i,sfd);
351 12 : switchingFunction.push_back( SwitchingFunction() );
352 6 : if( !has_sf ) {
353 1 : switchingFunction[i-1].set( "CUSTOM FUNC=1-sqrt(x2) R_0=1.0", errors );
354 1 : dist_target=i-1;
355 : } else {
356 5 : switchingFunction[i-1].set( sfd, errors );
357 5 : if( errors.length()!=0 ) {
358 0 : error("problem reading switching function description " + errors);
359 : }
360 : }
361 6 : log.printf(" %sth term seeks to match tranformed distances with those in matrix %s \n", inum.c_str(), target[0]->getName().c_str() );
362 6 : log.printf(" in %sth term distances are transformed by 1-switching function with r_0=%s \n", inum.c_str(), switchingFunction[i-1].description().c_str() );
363 6 : log.printf(" in %sth term weights of matrix elements in stress function are given by %s \n", inum.c_str(), weights[0]->getName().c_str() );
364 6 : }
365 : std::string mtype;
366 10 : parse("MINTYPE",mtype);
367 5 : if( mtype=="conjgrad" ) {
368 3 : mintype=conjgrad;
369 3 : log.printf(" minimimising stress function using conjugate gradients\n");
370 2 : } else if( mtype=="pointwise") {
371 1 : mintype=pointwise;
372 1 : log.printf(" minimimising stress function using pointwise global optimisation\n");
373 1 : npoints.resize(dimout);
374 1 : nfgrid.resize(dimout);
375 1 : parseVector("CGRID_SIZE",npoints);
376 1 : parse("BUFFER",gbuf);
377 1 : parse("NCYCLES",ncycles);
378 2 : parseVector("FGRID_SIZE",nfgrid);
379 1 : if( nfgrid[0]!=0 && dimout!=2 ) {
380 0 : error("interpolation only works in two dimensions");
381 : }
382 1 : log.printf(" doing %u cycles of global optimization sweeps\n",ncycles);
383 1 : log.printf(" using coarse grid of points that is %u",npoints[0]);
384 2 : for(unsigned j=1; j<npoints.size(); ++j) {
385 1 : log.printf(" by %u",npoints[j]);
386 : }
387 1 : log.printf("\n grid is %f times larger than the difference between the position of the minimum and maximum projection \n",gbuf);
388 1 : if( nfgrid[0]>0 ) {
389 1 : log.printf(" interpolating stress onto grid of points that is %u",nfgrid[0]);
390 2 : for(unsigned j=1; j<nfgrid.size(); ++j) {
391 1 : log.printf(" by %u",nfgrid[j]);
392 : }
393 1 : log.printf("\n");
394 : }
395 1 : } else if( mtype=="smacof" ) {
396 1 : mintype=smacof;
397 1 : if( dist_target<0 ) {
398 0 : error("one of targets must be distances in order to use smacof");
399 : }
400 1 : log.printf(" minimising stress fucntion using smacof\n");
401 1 : parse("SMACTOL",smacof_tol);
402 1 : parse("SMACREG",smacof_reg);
403 1 : log.printf(" tolerance for smacof algorithms equals %f \n", smacof_tol);
404 1 : log.printf(" using %f as regularisation parameter for weights in smacof algorithm\n", smacof_reg);
405 : } else {
406 0 : error("invalid MINTYPE");
407 : }
408 5 : if( mintype!=smacof) {
409 4 : parse("CGTOL",cgtol);
410 4 : log.printf(" tolerance for conjugate gradient algorithm equals %f \n",cgtol);
411 : }
412 5 : parse("MAXITER",maxiter);
413 5 : log.printf(" maximum number of iterations for minimimization algorithms equals %d \n", maxiter );
414 5 : requestArguments( args );
415 5 : checkRead();
416 5 : }
417 :
418 12 : void ArrangePoints::checkInputMatrix( const std::string& key, const unsigned& nvals, const std::vector<Value*>& mat ) const {
419 12 : if( mat.size()!=1 ) {
420 0 : error("should only be one value in input to " + key );
421 : }
422 12 : if( mat[0]->getRank()!=2 || mat[0]->hasDerivatives() ) {
423 0 : error("input to " + key + " keyword should be a matrix");
424 : }
425 12 : if( mat[0]->getShape()[0]!=nvals || mat[0]->getShape()[1]!=nvals ) {
426 0 : error("input to " + key + " keyword has the wrong size");
427 : }
428 12 : }
429 :
430 24600 : double ArrangePoints::calculateStress( const std::vector<double>& p, std::vector<double>& d ) {
431 : double stress=0;
432 73800 : for(unsigned i=0; i<p.size(); ++i) {
433 49200 : d[i]=0.0;
434 : }
435 24600 : std::vector<double> dtmp(dimout);
436 24600 : std::vector<std::size_t> shape( getPntrToArgument( dimout )->getShape() );
437 24600 : unsigned targi=shape[0]*current_index;
438 24600 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
439 2484600 : for(unsigned i=0; i<shape[0]; ++i) {
440 2460000 : if( i==current_index ) {
441 24600 : continue ;
442 : }
443 : // Calculate distance in low dimensional space
444 : double dd2=0;
445 7306200 : for(unsigned k=0; k<dimout; ++k) {
446 4870800 : dtmp[k]=p[k] - mypos[dimout*i+k];
447 4870800 : dd2+=dtmp[k]*dtmp[k];
448 : }
449 :
450 4870800 : for(unsigned k=0; k<nmatrices; ++k ) {
451 : // Now do transformations and calculate differences
452 2435400 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
453 : // Get the weight for this connection
454 : double weight = 0;
455 245975400 : for(unsigned j=0; j<shape[0]; ++j) {
456 243540000 : weight += getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
457 : }
458 : // Get the difference for the connection
459 2435400 : double fdiff = fd - getPntrToArgument( dimout + 2*k )->get( targi+i );
460 : // Calculate derivatives
461 2435400 : double pref = -2.*weight*fdiff*df;
462 7306200 : for(unsigned n=0; n<dimout; ++n) {
463 4870800 : d[n] += pref*dtmp[n];
464 : }
465 : // Accumulate the total stress
466 2435400 : stress += weight*fdiff*fdiff;
467 : }
468 : }
469 24600 : return stress;
470 : }
471 :
472 1893 : double ArrangePoints::calculateFullStress( const std::vector<double>& p, std::vector<double>& d ) {
473 : // Zero derivative and stress accumulators
474 383993 : for(unsigned i=0; i<p.size(); ++i) {
475 382100 : d[i]=0.0;
476 : }
477 : double stress=0;
478 1893 : std::vector<double> dtmp( dimout );
479 :
480 1893 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
481 1893 : std::vector<std::size_t> shape( getPntrToArgument( dimout )->getShape() );
482 191050 : for(unsigned i=1; i<shape[0]; ++i) {
483 14084882 : for(unsigned j=0; j<i; ++j) {
484 : // Calculate distance in low dimensional space
485 : double dd2=0;
486 41687175 : for(unsigned k=0; k<dimout; ++k) {
487 27791450 : dtmp[k]=p[dimout*i+k] - p[dimout*j+k];
488 27791450 : dd2+=dtmp[k]*dtmp[k];
489 : }
490 :
491 27791450 : for(unsigned k=0; k<nmatrices; ++k ) {
492 : // Now do transformations and calculate differences
493 13895725 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
494 : // Get the weight for this connection
495 13895725 : double weight = getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
496 : // Get the difference for the connection
497 13895725 : double fdiff = fd - getPntrToArgument( dimout + 2*k )->get( shape[0]*i+j );
498 : // Calculate derivatives
499 13895725 : double pref = -2.*weight*fdiff*df;
500 41687175 : for(unsigned n=0; n<dimout; ++n) {
501 27791450 : double dterm=pref*dtmp[n];
502 27791450 : d[dimout*i+n]+=dterm;
503 27791450 : d[dimout*j+n]-=dterm;
504 : }
505 : // Accumulate the total stress
506 13895725 : stress += weight*fdiff*fdiff;
507 : }
508 : }
509 : }
510 1893 : return stress;
511 : }
512 :
513 2 : double ArrangePoints::recalculateSmacofWeights( const std::vector<double>& p, SMACOF& mysmacof ) const {
514 : double stress=0, totalWeight=0;
515 2 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
516 2 : std::vector<std::size_t> shape( getPntrToArgument( dimout )->getShape() );
517 1000 : for(unsigned i=1; i<shape[0]; ++i) {
518 250498 : for(unsigned j=0; j<i; ++j) {
519 : // Calculate distance in low dimensional space
520 : double dd2=0;
521 748500 : for(unsigned k=0; k<dimout; ++k) {
522 499000 : double dtmp=p[dimout*i+k] - p[dimout*j+k];
523 499000 : dd2+=dtmp*dtmp;
524 : }
525 : // Calculate difference between target difference and true difference
526 249500 : double wval=0, dd1 = sqrt(dd2);
527 249500 : double diff = mysmacof.getDistance(i,j) - dd1;
528 :
529 748500 : for(unsigned k=0; k<nmatrices; ++k ) {
530 : // Don't need to do anything for distances we are matching
531 499000 : if( k==static_cast<unsigned>(dist_target) ) {
532 249500 : continue;
533 : }
534 : // Now do transformations and calculate differences
535 249500 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
536 : // Get the weight for this connection
537 249500 : double weight = getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
538 : // Get the difference for the connection
539 249500 : double fdiff = getPntrToArgument( dimout + 2*k )->get( shape[0]*i+j ) - fd;
540 : // Now set the weight if difference in distance is larger than regularisation parameter
541 249500 : if( fabs(diff)>smacof_reg ) {
542 249403 : wval -= weight*fdiff*df*dd1 / diff;
543 : }
544 : // And the total stress and weights
545 249500 : stress += weight*fdiff*fdiff;
546 249500 : totalWeight += weight;
547 : }
548 249500 : mysmacof.setWeight( j, i, wval );
549 249500 : mysmacof.setWeight( i, j, wval );
550 : }
551 : }
552 2 : return stress / totalWeight;
553 : }
554 :
555 6 : void ArrangePoints::optimize( std::vector<double>& pos ) {
556 : ConjugateGradient<ArrangePoints> mycgminimise( this );
557 6 : if( mintype==conjgrad ) {
558 4 : mycgminimise.minimise( cgtol, pos, &ArrangePoints::calculateFullStress );
559 2 : } else if( mintype==pointwise ) {
560 1 : unsigned nvals=getPntrToArgument( dimout )->getShape()[0];
561 1 : std::vector<double> gmin( dimout ), gmax( dimout ), mypoint( dimout );
562 : // Find the extent of the grid
563 3 : for(unsigned j=0; j<dimout; ++j) {
564 2 : gmin[j]=gmax[j]=pos[j];
565 : }
566 100 : for(unsigned i=1; i<nvals; ++i) {
567 297 : for(unsigned j=0; j<dimout; ++j) {
568 198 : if( pos[dimout*i+j] < gmin[j] ) {
569 7 : gmin[j] = pos[dimout*i+j];
570 : }
571 198 : if( pos[dimout*i+j] > gmax[j] ) {
572 8 : gmax[j] = pos[dimout*i+j];
573 : }
574 : }
575 : }
576 3 : for(unsigned j=0; j<dimout; ++j) {
577 2 : double gbuffer = 0.5*gbuf*( gmax[j]-gmin[j] ) - 0.5*( gmax[j]- gmin[j] );
578 2 : gmin[j]-=gbuffer;
579 2 : gmax[j]+=gbuffer;
580 : }
581 1 : mypos.resize( pos.size() );
582 201 : for(unsigned i=0; i<mypos.size(); ++i) {
583 200 : mypos[i] = pos[i];
584 : }
585 1 : gridtools::GridSearch<ArrangePoints> mygridsearch( gmin, gmax, npoints, nfgrid, this );
586 : // Run multiple loops over all projections
587 3 : for(unsigned i=0; i<ncycles; ++i) {
588 202 : for(unsigned j=0; j<nvals; ++j) {
589 : // Setup target distances and target functions for calculate stress
590 200 : current_index=j;
591 :
592 : // Find current projection of jth point
593 600 : for(unsigned k=0; k<dimout; ++k) {
594 400 : mypoint[k]=mypos[j*dimout+k];
595 : }
596 : // Minimise using grid search
597 200 : bool moved=mygridsearch.minimise( mypoint, &ArrangePoints::calculateStress );
598 200 : if( moved ) {
599 : // Reassign the new projection
600 66 : for(unsigned k=0; k<dimout; ++k) {
601 44 : mypos[dimout*j+k]=mypoint[k];
602 : }
603 : // Minimise output using conjugate gradient
604 22 : mycgminimise.minimise( cgtol, mypos, &ArrangePoints::calculateFullStress );
605 : }
606 : }
607 402 : for(unsigned ii=0; ii<mypos.size(); ++ii) {
608 400 : pos[ii] = mypos[ii];
609 : }
610 : }
611 2 : } else if( mintype==smacof ) {
612 1 : SMACOF mysmacof( getPntrToArgument( dimout + 2*dist_target) );
613 1 : double stress = recalculateSmacofWeights( pos, mysmacof );
614 :
615 1 : for(unsigned i=0; i<maxiter; ++i) {
616 : // Optimise using smacof and current weights
617 1 : mysmacof.optimize( smacof_tol, maxiter, pos );
618 : // Recalculate weights matrix and sigma
619 1 : double newsig = recalculateSmacofWeights( pos, mysmacof );
620 : // Test whether or not the algorithm has converged
621 1 : if( fabs( newsig - stress )<smacof_tol ) {
622 : break;
623 : }
624 : // Make initial sigma into new sigma so that the value of new sigma is used every time so that the error can be reduced
625 : stress=newsig;
626 : }
627 : }
628 6 : }
629 :
630 6 : void ArrangePoints::prepare() {
631 : // Make sure all the components are the right size
632 6 : std::vector<std::size_t> shape(1,getPntrToArgument( dimout )->getShape()[0]);
633 18 : for(unsigned j=0; j<dimout; ++j) {
634 12 : if( getPntrToComponent(j)->getShape()[0]!=shape[0] ) {
635 8 : getPntrToComponent(j)->setShape( shape );
636 : }
637 : }
638 6 : }
639 :
640 6 : void ArrangePoints::calculate() {
641 : // Retrive the initial value
642 6 : unsigned nvals = getPntrToArgument( dimout )->getShape()[0];
643 6 : std::vector<double> pos( dimout*nvals );
644 1056 : for(unsigned i=0; i<nvals; ++i) {
645 3150 : for(unsigned j=0; j<dimout; ++j) {
646 2100 : pos[ dimout*i + j ] = getPntrToArgument(j)->get(i);
647 : }
648 : }
649 : // Do the optimization
650 6 : optimize( pos );
651 : // And set the final values
652 1056 : for(unsigned i=0; i<nvals; ++i) {
653 3150 : for(unsigned j=0; j<dimout; ++j) {
654 2100 : getPntrToComponent(j)->set( i, pos[dimout*i+j] );
655 : }
656 : }
657 6 : }
658 :
659 : }
660 : }
|