Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 : Copyright (c) 2015-2023 The plumed team 3 : (see the PEOPLE file at the root of the distribution for a list of names) 4 : 5 : See http://www.plumed.org for more information. 6 : 7 : This file is part of plumed, version 2. 8 : 9 : plumed is free software: you can redistribute it and/or modify 10 : it under the terms of the GNU Lesser General Public License as published by 11 : the Free Software Foundation, either version 3 of the License, or 12 : (at your option) any later version. 13 : 14 : plumed is distributed in the hope that it will be useful, 15 : but WITHOUT ANY WARRANTY; without even the implied warranty of 16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 : GNU Lesser General Public License for more details. 18 : 19 : You should have received a copy of the GNU Lesser General Public License 20 : along with plumed. If not, see <http://www.gnu.org/licenses/>. 21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ 22 : #include "matrixtools/MatrixOperationBase.h" 23 : #include "core/ActionRegister.h" 24 : #include "tools/Random.h" 25 : 26 : //+PLUMEDOC LANDMARKS FARTHEST_POINT_SAMPLING 27 : /* 28 : Select a set of landmarks using farthest point sampling. 29 : 30 : This action is used within the [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) shortcut, which performs farthest point sampling. 31 : Farthest point sampling is a method of selecting a subset of input coordinates that works by selecting a first point at random. 32 : The remaining points are then selected by taking the unselected point in the input data set that is the furthest 33 : from all the points that have been selected thus far. 34 : 35 : This particular action is designed to be used in conjunction with [SELECT_WITH_MASK](SELECT_WITH_MASK.md) in the same way that 36 : [CREATE_MASK](CREATE_MASK.md) is designed to be be used with that action. This action takes an $N\times N$ matrix of dissimilarities 37 : in input and outputs and $N$ dimensional vector whose elements are ones and zeros. As shown in the example input below, this output 38 : vector is passed to a [SELECT_WITH_MASK](SELECT_WITH_MASK.md) using the MASK keyword. 39 : 40 : The example below thus shows how this action is used in the [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) shortcut to select 100 landmark 41 : frames from the trajectory using farthest point sampling. 42 : 43 : ```plumed 44 : # This stores the positions of all the first 10 atoms in the system for later analysis 45 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL 46 : 47 : # We now compute the dissimilarities between these frames 48 : cc_dataT: TRANSPOSE ARG=cc_data 49 : dd: DISSIMILARITIES ARG=cc_data,cc_dataT 50 : 51 : # Create a mask that will be used to create our landmark data 52 : mask: FARTHEST_POINT_SAMPLING ARG=dd NZEROS=100 53 : 54 : # These are the landmark points 55 : landmarks: SELECT_WITH_MASK ARG=cc_data ROW_MASK=mask 56 : 57 : # Output the landmarks to a file 58 : DUMPPDB ATOMS=landmarks ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb 59 : ``` 60 : 61 : This only saves the coordinates of the landmark points. If you look at the shortcuts in the documentation for [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) 62 : you can see how the shortcut also saves information on the dissimilarities between the landmarks, the dissimilarities between the landmarks and all the other 63 : points and the weights of the landmarks, which are determined using a [VORONOI](VORONOI.md) analysis. 64 : 65 : */ 66 : //+ENDPLUMEDOC 67 : 68 : namespace PLMD { 69 : namespace landmarks { 70 : 71 : class FarthestPointSampling : public matrixtools::MatrixOperationBase { 72 : private: 73 : unsigned seed; 74 : unsigned nlandmarks; 75 : public: 76 : static void registerKeywords( Keywords& keys ); 77 : explicit FarthestPointSampling( const ActionOptions& ao ); 78 0 : unsigned getNumberOfDerivatives() override { 79 0 : return 0; 80 : } 81 : void prepare() override ; 82 : void calculate() override ; 83 0 : void apply() override {} 84 0 : double getForceOnMatrixElement( const unsigned& jrow, const unsigned& krow ) const override { 85 0 : plumed_merror("this should not be called"); 86 : } 87 : }; 88 : 89 : PLUMED_REGISTER_ACTION(FarthestPointSampling,"FARTHEST_POINT_SAMPLING") 90 : 91 4 : void FarthestPointSampling::registerKeywords( Keywords& keys ) { 92 4 : matrixtools::MatrixOperationBase::registerKeywords( keys ); 93 4 : keys.add("compulsory","NZEROS","the number of landmark points that you want to select"); 94 4 : keys.add("compulsory","SEED","1234","a random number seed"); 95 8 : keys.setValueDescription("vector","a vector which has as many elements as there are rows in the input matrix of dissimilarities. NZEROS of the elements in this vector are equal to one, the rest of the elements are equal to zero. The nodes that have elements equal to one are the NZEROS points that are farthest appart according to the input dissimilarities"); 96 4 : } 97 : 98 1 : FarthestPointSampling::FarthestPointSampling( const ActionOptions& ao ): 99 : Action(ao), 100 1 : MatrixOperationBase(ao) { 101 1 : if( getPntrToArgument(0)->getShape()[0]!=getPntrToArgument(0)->getShape()[1] ) { 102 0 : error("input to this argument should be a square matrix of dissimilarities"); 103 : } 104 1 : parse("NZEROS",nlandmarks); 105 1 : parse("SEED",seed); 106 1 : log.printf(" selecting %d landmark points \n", nlandmarks ); 107 : 108 1 : std::vector<std::size_t> shape(1); 109 1 : shape[0] = getPntrToArgument(0)->getShape()[0]; 110 1 : addValue( shape ); 111 1 : setNotPeriodic(); 112 1 : } 113 : 114 1 : void FarthestPointSampling::prepare() { 115 1 : Value* myval = getPntrToComponent(0); 116 1 : if( myval->getShape()[0]!=getPntrToArgument(0)->getShape()[0] ) { 117 1 : std::vector<std::size_t> shape(1); 118 1 : shape[0] = getPntrToArgument(0)->getShape()[0]; 119 1 : myval->setShape(shape); 120 : } 121 4 : for(unsigned i=0; i<nlandmarks; ++i) { 122 3 : myval->set( i, 0.0 ); 123 : } 124 11 : for(unsigned i=nlandmarks; i<myval->getShape()[0]; ++i) { 125 10 : myval->set( i, 1.0 ); 126 : } 127 1 : } 128 : 129 1 : void FarthestPointSampling::calculate() { 130 1 : Value* myval=getPntrToComponent(0); 131 1 : unsigned npoints = getPntrToArgument(0)->getShape()[0]; 132 14 : for(unsigned i=0; i<npoints; ++i) { 133 13 : myval->set( i, 1.0 ); 134 : } 135 1 : std::vector<unsigned> landmarks( nlandmarks ); 136 : 137 : // Select first point at random 138 1 : Random random; 139 1 : random.setSeed(-seed); 140 1 : double rand=random.RandU01(); 141 1 : landmarks[0] = std::floor( npoints*rand ); 142 1 : myval->set( landmarks[0], 0 ); 143 : 144 : // Now find distance to all other points (N.B. We can use squared distances here for speed) 145 1 : Matrix<double> distances( nlandmarks, npoints ); 146 : Value* myarg = getPntrToArgument(0); 147 14 : for(unsigned i=0; i<npoints; ++i) { 148 13 : distances(0,i) = myarg->get( landmarks[0]*npoints + i ); 149 : } 150 : 151 : // Now find all other landmarks 152 3 : for(unsigned i=1; i<nlandmarks; ++i) { 153 : // Find point that has the largest minimum distance from the landmarks selected thus far 154 : double maxd=0; 155 28 : for(unsigned j=0; j<npoints; ++j) { 156 26 : double mind=distances(0,j); 157 39 : for(unsigned k=1; k<i; ++k) { 158 13 : if( distances(k,j)<mind ) { 159 : mind=distances(k,j); 160 : } 161 : } 162 26 : if( mind>maxd ) { 163 : maxd=mind; 164 5 : landmarks[i]=j; 165 : } 166 : } 167 2 : myval->set( landmarks[i], 0 ); 168 28 : for(unsigned k=0; k<npoints; ++k) { 169 26 : distances(i,k) = myarg->get( landmarks[i]*npoints + k ); 170 : } 171 : } 172 1 : } 173 : 174 : } 175 : }