Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "colvar/Colvar.h"
24 : #include "core/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "tools/Communicator.h"
27 : #include "tools/Matrix.h"
28 : #include "core/GenericMolInfo.h"
29 : #include "core/ActionSet.h"
30 : #include "tools/File.h"
31 : #include "tools/OpenMP.h"
32 : #include <string>
33 : #include <cmath>
34 : #include <map>
35 : #include <ctime>
36 : #include "tools/Random.h"
37 :
38 : #ifndef M_PI
39 : //why do not use PLMD::pi?
40 : #define M_PI 3.14159265358979323846
41 : #endif
42 :
43 : namespace PLMD {
44 : namespace isdb {
45 :
46 : //+PLUMEDOC ISDB_COLVAR BAIES
47 : /*
48 : Bayesian refinement of AF models.
49 :
50 : This action implements the Bayesian approach to refine AF models introduced here and here.
51 : It can be used to generate conformational ensembles of IDPs or refine AF models prior to small-molecule virtual screening.
52 :
53 : ## Examples
54 :
55 : Complete tutorials can be found <a href="https://github.com/COSBlab/bAIes-IDP">here</a> and here.
56 :
57 : */
58 : //+ENDPLUMEDOC
59 :
60 : class BAIES : public Colvar {
61 :
62 : private:
63 :
64 : // bool pbc
65 : bool pbc_;
66 : // temperature in kbt
67 : double kbt_;
68 : // number of threads
69 : unsigned nt_;
70 : // positions
71 : std::vector<Vector> pos_;
72 : // derivatives
73 : std::vector<Vector> atom_der_;
74 : // AF2 parameters variables
75 : std::string mtype_;
76 : std::vector < std::pair<unsigned, unsigned> > atom_pairs_;
77 : std::vector<double> mus_;
78 : std::vector<double> sigmas_;
79 : // constants
80 : double inv_sqrt2_, sqrt2_pi_, inv_pi2_;
81 : // prior type
82 : unsigned prior_;
83 : enum { NONE, JEFFREYS, CAUCHY };
84 : // private functions
85 : void read_data_file(std::string datafile, std::vector<AtomNumber> atoms, double smin);
86 : // energie
87 : double getGauss();
88 : double getGaussJeffreys();
89 : double getGaussCauchy();
90 : double getLognorm();
91 : double getLognormJeffreys();
92 : double getLognormCauchy();
93 :
94 : public:
95 : static void registerKeywords( Keywords& keys );
96 : explicit BAIES(const ActionOptions&);
97 : // active methods:
98 : void calculate() override;
99 : };
100 :
101 : PLUMED_REGISTER_ACTION(BAIES,"BAIES")
102 :
103 2 : void BAIES::registerKeywords( Keywords& keys ) {
104 2 : Colvar::registerKeywords( keys );
105 2 : keys.add("atoms","ATOMS","atoms used in the calculation of bAIes energy");
106 2 : keys.add("compulsory","DATA_FILE","file with AF2 fit parameters");
107 2 : keys.add("compulsory","PRIOR", "type of prior to use (NONE, JEFFREYS, CAUCHY");
108 2 : keys.add("optional","TEMP", "temperature in kBt units");
109 2 : keys.add("optional","SIGMA_MIN", "minimum value of sigma");
110 4 : keys.addOutputComponent("ene","default","scalar","Bayesian bAIes energy");
111 2 : }
112 :
113 0 : BAIES::BAIES(const ActionOptions&ao):
114 : PLUMED_COLVAR_INIT(ao),
115 0 : pbc_(true) {
116 : // set constants
117 0 : inv_sqrt2_ = 1.0/sqrt(2.0);
118 0 : sqrt2_pi_ = sqrt(2.0 / M_PI);
119 0 : inv_pi2_ = 0.5 / M_PI / M_PI;
120 :
121 : // list of atoms
122 : std::vector<AtomNumber> atoms;
123 0 : parseAtomList("ATOMS", atoms);
124 :
125 : // file with AF2 fit parameters
126 : std::string datafile;
127 0 : parse("DATA_FILE", datafile);
128 :
129 : // prior type
130 : std::string prior;
131 0 : parse("PRIOR", prior);
132 : // check priors allowed
133 0 : if(prior=="NONE") {
134 0 : prior_ = NONE;
135 0 : } else if(prior=="JEFFREYS") {
136 0 : prior_ = JEFFREYS;
137 0 : } else if(prior=="CAUCHY") {
138 0 : prior_ = CAUCHY;
139 : } else {
140 0 : error("Unknown PRIOR type!");
141 : }
142 :
143 0 : bool nopbc=!pbc_;
144 0 : parseFlag("NOPBC",nopbc);
145 0 : pbc_=!nopbc;
146 :
147 : // set kbt
148 : //kbt_ = plumed.getAtoms().getKbT();
149 0 : kbt_ = getkBT();
150 0 : parse("TEMP", kbt_);
151 :
152 : // set sigma_min
153 0 : double sigma_min = -1.0;
154 0 : parse("SIGMA_MIN",sigma_min);
155 :
156 : // check read
157 0 : checkRead();
158 :
159 : // set parallel stuff
160 0 : unsigned mpisize=comm.Get_size();
161 0 : if(mpisize>1) {
162 0 : error("BAIES supports only OpenMP parallelization");
163 : }
164 : // set number of OpenMP threads
165 0 : nt_ = OpenMP::getNumThreads();
166 :
167 : // read the data file
168 0 : read_data_file(datafile, atoms, sigma_min);
169 :
170 : // print stuff to log
171 0 : log.printf(" number of atoms involved : %u\n", atoms.size());
172 0 : log.printf(" AF2 fit parameters file : %s\n", datafile.c_str());
173 0 : log.printf(" prior type : %s\n", prior.c_str());
174 : // print stuff from data file
175 0 : log.printf(" noise model type : %s\n", mtype_.c_str());
176 0 : log.printf(" number of pairs involved : %u\n", atom_pairs_.size());
177 : // other info
178 0 : log.printf(" temperature of the system in energy unit : %f\n", kbt_);
179 0 : log.printf(" minimum value of sigma : %f\n", sigma_min);
180 0 : if(pbc_) {
181 0 : log.printf(" using periodic boundary conditions\n");
182 : } else {
183 0 : log.printf(" without periodic boundary conditions\n");
184 : }
185 :
186 : // prepare other vectors: data and derivatives
187 0 : atom_der_.resize(atoms.size());
188 :
189 : // add components
190 0 : addComponentWithDerivatives("ene");
191 0 : componentIsNotPeriodic("ene");
192 :
193 : // request atoms
194 0 : requestAtoms(atoms);
195 :
196 : // print bibliography
197 0 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
198 0 : log<<plumed.cite("Schnapka, Morozova, Sen, Bonomi, bioRxiv (2025) doi: XXX");
199 0 : log<<"\n";
200 0 : }
201 :
202 0 : void BAIES::read_data_file(const std::string datafile, const std::vector<AtomNumber> atoms, double smin) {
203 : unsigned id, ai, aj;
204 : double mu, sigma;
205 : // map serials to index in position array
206 : std::map< unsigned, unsigned > index;
207 0 : for(unsigned i=0; i<atoms.size(); ++i) {
208 0 : index[atoms[i].serial()]=i;
209 : }
210 : // open file
211 0 : IFile *ifile = new IFile();
212 0 : if(ifile->FileExist(datafile)) {
213 0 : ifile->open(datafile);
214 : // read constant fields
215 0 : if( ifile->FieldExist("model") ) {
216 0 : ifile->scanField("model",mtype_);
217 0 : if( mtype_!="gaussian" && mtype_!="lognormal" ) {
218 0 : error("Unknown noise model type");
219 : }
220 : } else {
221 0 : error("Missing noise model type in DATA_FILE");
222 : }
223 : // read line-by-line
224 0 : while(ifile->scanField("Id",id)) {
225 0 : ifile->scanField("atom_i",ai);
226 0 : ifile->scanField("atom_j",aj);
227 0 : ifile->scanField("mu",mu);
228 0 : ifile->scanField("sigma",sigma);
229 : // list of pairs of atoms
230 0 : atom_pairs_.push_back(std::make_pair(index[ai],index[aj]));
231 : // list of mus
232 0 : mus_.push_back(mu);
233 : // list of sigmas
234 0 : sigmas_.push_back(std::max(smin,sigma));
235 : // new line
236 0 : ifile->scanField();
237 : }
238 0 : ifile->close();
239 : } else {
240 0 : error("Cannot find DATA_FILE "+datafile+"\n");
241 : }
242 0 : delete ifile;
243 0 : }
244 :
245 : // Energy models
246 0 : double BAIES::getGauss() {
247 : double ene = 0.0;
248 : // loop over atoms pairs
249 0 : #pragma omp parallel num_threads(nt_) shared(ene)
250 : {
251 : std::vector<Vector> omp_der(atom_der_.size());
252 : #pragma omp for reduction( + : ene) nowait
253 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
254 : // get indexes
255 : unsigned ii = atom_pairs_[i].first;
256 : unsigned jj = atom_pairs_[i].second;
257 : // get distance
258 : Vector distance=delta(pos_[ii], pos_[jj]);
259 : const double value=distance.modulo();
260 : const double invvalue=1.0/value;
261 : // get energy
262 : double arg = ( value - mus_[i] ) / sigmas_[i];
263 : ene += arg * arg;
264 : // calculate derivatives
265 : double arg_der = kbt_ * arg / sigmas_[i];
266 : omp_der[ii] += - arg_der * invvalue * distance;
267 : omp_der[jj] += arg_der * invvalue * distance;
268 : }
269 : // gather from tasks
270 : #pragma omp critical
271 : for(unsigned i=0; i<atom_der_.size(); ++i) {
272 : atom_der_[i]+=omp_der[i];
273 : }
274 : }
275 : // missing Gaussian normalization constant
276 0 : return 0.5 * kbt_ * ene;
277 : }
278 :
279 0 : double BAIES::getGaussJeffreys() {
280 : double ene = 0.0;
281 : // loop over all atom pairs
282 0 : #pragma omp parallel num_threads(nt_) shared(ene)
283 : {
284 : std::vector<Vector> omp_der(atom_der_.size());
285 : #pragma omp for reduction( + : ene) nowait
286 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
287 : // get indexes
288 : unsigned ii = atom_pairs_[i].first;
289 : unsigned jj = atom_pairs_[i].second;
290 : // get distance
291 : Vector distance=delta(pos_[ii], pos_[jj]);
292 : const double value=distance.modulo();
293 : const double invvalue=1.0/value;
294 : // get energy
295 : double val_mu = value - mus_[i];
296 : double inv_val_mu = 1.0 / val_mu;
297 : double inv_sigma = 1.0 / sigmas_[i];
298 : double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
299 : double erfval = std::erf(erfcontent);
300 : // increase energy
301 : ene += -std::log( erfval * inv_val_mu );
302 : // calculate derivatives
303 : double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
304 : // increase derivatives
305 : omp_der[ii] += - arg_der * invvalue * distance;
306 : omp_der[jj] += arg_der * invvalue * distance;
307 : }
308 : // gather from tasks
309 : #pragma omp critical
310 : for(unsigned i=0; i<atom_der_.size(); ++i) {
311 : atom_der_[i]+=omp_der[i];
312 : }
313 : }
314 : // missing normalization constants
315 0 : return kbt_ * ene;
316 : }
317 :
318 0 : double BAIES::getGaussCauchy() {
319 : double ene = 0.0;
320 : // loop over all atom pairs
321 0 : #pragma omp parallel num_threads(nt_) shared(ene)
322 : {
323 : std::vector<Vector> omp_der(atom_der_.size());
324 : #pragma omp for reduction( + : ene) nowait
325 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
326 : // get indexes
327 : unsigned ii = atom_pairs_[i].first;
328 : unsigned jj = atom_pairs_[i].second;
329 : // get distance
330 : Vector distance=delta(pos_[ii], pos_[jj]);
331 : const double value=distance.modulo();
332 : const double invvalue=1.0/value;
333 : // get energy
334 : double val_mu = value - mus_[i];
335 : double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
336 : ene += std::log( lncontent );
337 : // calculate derivatives
338 : double arg_der = kbt_ * val_mu / lncontent;
339 : omp_der[ii] += - arg_der * invvalue * distance;
340 : omp_der[jj] += arg_der * invvalue * distance;
341 : }
342 : // gather from tasks
343 : #pragma omp critical
344 : for(unsigned i=0; i<atom_der_.size(); ++i) {
345 : atom_der_[i]+=omp_der[i];
346 : }
347 : }
348 0 : return kbt_ * ene;
349 : }
350 :
351 0 : double BAIES::getLognorm() {
352 : double ene = 0.0;
353 : // loop over atoms pairs
354 0 : #pragma omp parallel num_threads(nt_) shared(ene)
355 : {
356 : std::vector<Vector> omp_der(atom_der_.size());
357 : #pragma omp for reduction( + : ene) nowait
358 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
359 : // get indexes
360 : unsigned ii = atom_pairs_[i].first;
361 : unsigned jj = atom_pairs_[i].second;
362 : // get distance
363 : Vector distance=delta(pos_[ii], pos_[jj]);
364 : const double value=distance.modulo();
365 : const double invvalue=1.0/value;
366 : // get energy
367 : double arg = ( std::log(value) - mus_[i] ) / sigmas_[i];
368 : ene += arg * arg;
369 : // calculate derivatives
370 : double arg_der = kbt_ * arg / (value * sigmas_[i]);
371 : omp_der[ii] += - arg_der * invvalue * distance;
372 : omp_der[jj] += arg_der * invvalue * distance;
373 : }
374 : // gather from tasks
375 : #pragma omp critical
376 : for(unsigned i=0; i<atom_der_.size(); ++i) {
377 : atom_der_[i]+=omp_der[i];
378 : }
379 : }
380 0 : return 0.5 * kbt_ * ene;
381 : }
382 :
383 0 : double BAIES::getLognormJeffreys() {
384 : double ene = 0.0;
385 : // loop over all atom pairs
386 0 : #pragma omp parallel num_threads(nt_) shared(ene)
387 : {
388 : std::vector<Vector> omp_der(atom_der_.size());
389 : #pragma omp for reduction( + : ene) nowait
390 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
391 : // get indexes
392 : unsigned ii = atom_pairs_[i].first;
393 : unsigned jj = atom_pairs_[i].second;
394 : // get distance
395 : Vector distance=delta(pos_[ii], pos_[jj]);
396 : const double value=distance.modulo();
397 : const double invvalue=1.0/value;
398 : // get energy
399 : double val_mu = std::log(value) - mus_[i];
400 : double inv_val_mu = 1 / val_mu;
401 : double inv_sigma = 1 / sigmas_[i];
402 : double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
403 : double erfval = std::erf(erfcontent);
404 : // increase energy
405 : ene += -std::log( erfval * inv_val_mu );
406 : // calculate derivatives
407 : double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
408 : // chain rule
409 : arg_der *= invvalue;
410 : // increase derivatives
411 : omp_der[ii] += - arg_der * invvalue * distance;
412 : omp_der[jj] += arg_der * invvalue * distance;
413 : }
414 : // gather from tasks
415 : #pragma omp critical
416 : for(unsigned i=0; i<atom_der_.size(); ++i) {
417 : atom_der_[i]+=omp_der[i];
418 : }
419 : }
420 : // missing constants
421 0 : return kbt_ * ene;
422 : }
423 :
424 0 : double BAIES::getLognormCauchy() {
425 : double ene = 0.0;
426 : // loop over all atom pairs
427 0 : #pragma omp parallel num_threads(nt_) shared(ene)
428 : {
429 : std::vector<Vector> omp_der(atom_der_.size());
430 : #pragma omp for reduction( + : ene) nowait
431 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
432 : // get indexes
433 : unsigned ii = atom_pairs_[i].first;
434 : unsigned jj = atom_pairs_[i].second;
435 : // get distance
436 : Vector distance=delta(pos_[ii], pos_[jj]);
437 : const double value=distance.modulo();
438 : const double invvalue=1.0/value;
439 : // get energy
440 : double val_mu = std::log(value) - mus_[i];
441 : double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
442 : ene += std::log( lncontent );
443 : // calculate derivatives
444 : double arg_der = kbt_ * val_mu * invvalue / lncontent;
445 : omp_der[ii] += - arg_der * invvalue * distance;
446 : omp_der[jj] += arg_der * invvalue * distance;
447 : }
448 : // gather from tasks
449 : #pragma omp critical
450 : for(unsigned i=0; i<atom_der_.size(); ++i) {
451 : atom_der_[i]+=omp_der[i];
452 : }
453 : }
454 : // missing constants
455 0 : return kbt_ * ene;
456 : }
457 :
458 0 : void BAIES::calculate() {
459 : // fix PBCs
460 0 : if(pbc_) {
461 0 : makeWhole();
462 : }
463 :
464 : // get the positions at this step
465 0 : pos_ = getPositions();
466 :
467 : // reset derivatives
468 0 : #pragma omp parallel for num_threads(nt_)
469 : for(unsigned i=0; i<atom_der_.size(); ++i) {
470 : atom_der_[i] = Vector(0,0,0);
471 : }
472 :
473 : // calculate bAIes energy and derivatives
474 : double ene = 0.0;
475 : // Gaussian noise model
476 0 : if(mtype_=="gaussian") {
477 0 : switch(prior_) {
478 0 : case NONE:
479 0 : ene = getGauss();
480 : break;
481 0 : case JEFFREYS:
482 0 : ene = getGaussJeffreys();
483 : break;
484 0 : case CAUCHY:
485 0 : ene = getGaussCauchy();
486 : break;
487 : }
488 : } else {
489 0 : switch(prior_) {
490 0 : case NONE:
491 0 : ene = getLognorm();
492 : break;
493 0 : case JEFFREYS:
494 0 : ene = getLognormJeffreys();
495 : break;
496 0 : case CAUCHY:
497 0 : ene = getLognormCauchy();
498 : break;
499 : }
500 : }
501 : // set score
502 0 : Value* score = getPntrToComponent("ene");
503 : score->set(ene);
504 : // calculate virial
505 : Tensor virial;
506 : // declare omp reduction for Tensors
507 : #pragma omp declare reduction( sumTensor : Tensor : omp_out += omp_in )
508 :
509 0 : #pragma omp parallel for num_threads(nt_) reduction (sumTensor : virial)
510 : for(unsigned i=0; i<pos_.size(); ++i) {
511 : virial += Tensor(pos_[i], -atom_der_[i]);
512 : }
513 : // set virial
514 0 : setBoxDerivatives(score, virial);
515 : // set derivatives
516 0 : #pragma omp parallel for num_threads(nt_)
517 : for(unsigned i=0; i<atom_der_.size(); ++i) {
518 : setAtomsDerivatives(score, i, atom_der_[i]);
519 : }
520 0 : }
521 :
522 : }
523 : }
|