Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "colvar/Colvar.h"
24 : #include "core/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "tools/Communicator.h"
27 : #include "tools/Matrix.h"
28 : #include "core/GenericMolInfo.h"
29 : #include "core/ActionSet.h"
30 : #include "tools/File.h"
31 : #include "tools/OpenMP.h"
32 : #include <string>
33 : #include <iostream>
34 : #include <cmath>
35 : #include <map>
36 : #include <numeric>
37 : #include <ctime>
38 : #include "tools/Random.h"
39 :
40 : #ifndef M_PI
41 : #define M_PI 3.14159265358979323846
42 : #endif
43 :
44 : namespace PLMD {
45 : namespace isdb {
46 :
47 : //+PLUMEDOC ISDB_COLVAR BAIES
48 : /*
49 : Bayesian refinement of AF models.
50 :
51 : This action implements the Bayesian approach to refine AF models introduced here and here.
52 : It can be used to generate conformational ensembles of IDPs or refine AF models prior to small-molecule virtual screening.
53 :
54 : \par Examples
55 : ## Examples
56 :
57 : Complete tutorials can be found <a href="https://github.com/COSBlab/bAIes-IDP">here</a> and here.
58 :
59 : */
60 : //+ENDPLUMEDOC
61 :
62 : class BAIES : public Colvar {
63 :
64 : private:
65 :
66 : // bool pbc
67 : bool pbc_;
68 : // temperature in kbt
69 : double kbt_;
70 : // number of threads
71 : unsigned nt_;
72 : // positions
73 : std::vector<Vector> pos_;
74 : // derivatives
75 : std::vector<Vector> atom_der_;
76 : // AF2 parameters variables
77 : std::string mtype_;
78 : std::vector < std::pair<unsigned, unsigned> > atom_pairs_;
79 : std::vector<double> mus_;
80 : std::vector<double> sigmas_;
81 : // constants
82 : double inv_sqrt2_, sqrt2_pi_, inv_pi2_;
83 : // prior type
84 : unsigned prior_;
85 : enum { NONE, JEFFREYS, CAUCHY };
86 : // private functions
87 : void read_data_file(std::string datafile, std::vector<AtomNumber> atoms, double smin);
88 : // energie
89 : double getGauss();
90 : double getGaussJeffreys();
91 : double getGaussCauchy();
92 : double getLognorm();
93 : double getLognormJeffreys();
94 : double getLognormCauchy();
95 :
96 : public:
97 : static void registerKeywords( Keywords& keys );
98 : explicit BAIES(const ActionOptions&);
99 : // active methods:
100 : void calculate() override;
101 : };
102 :
103 : PLUMED_REGISTER_ACTION(BAIES,"BAIES")
104 :
105 2 : void BAIES::registerKeywords( Keywords& keys ) {
106 2 : Colvar::registerKeywords( keys );
107 4 : keys.add("atoms","ATOMS","atoms used in the calculation of bAIes energy");
108 4 : keys.add("compulsory","DATA_FILE","file with AF2 fit parameters");
109 4 : keys.add("compulsory","PRIOR", "type of prior to use (NONE, JEFFREYS, CAUCHY");
110 4 : keys.add("optional","TEMP", "temperature in kBt units");
111 4 : keys.add("optional","SIGMA_MIN", "minimum value of sigma");
112 4 : keys.addOutputComponent("ene","default","Bayesian bAIes energy");
113 2 : }
114 :
115 0 : BAIES::BAIES(const ActionOptions&ao):
116 : PLUMED_COLVAR_INIT(ao),
117 0 : pbc_(true) {
118 : // set constants
119 0 : inv_sqrt2_ = 1.0/sqrt(2.0);
120 0 : sqrt2_pi_ = sqrt(2.0 / M_PI);
121 0 : inv_pi2_ = 0.5 / M_PI / M_PI;
122 :
123 : // list of atoms
124 : std::vector<AtomNumber> atoms;
125 0 : parseAtomList("ATOMS", atoms);
126 :
127 : // file with AF2 fit parameters
128 : std::string datafile;
129 0 : parse("DATA_FILE", datafile);
130 :
131 : // prior type
132 : std::string prior;
133 0 : parse("PRIOR", prior);
134 : // check priors allowed
135 0 : if(prior=="NONE") {
136 0 : prior_ = NONE;
137 0 : } else if(prior=="JEFFREYS") {
138 0 : prior_ = JEFFREYS;
139 0 : } else if(prior=="CAUCHY") {
140 0 : prior_ = CAUCHY;
141 : } else {
142 0 : error("Unknown PRIOR type!");
143 : }
144 :
145 0 : bool nopbc=!pbc_;
146 0 : parseFlag("NOPBC",nopbc);
147 0 : pbc_=!nopbc;
148 :
149 : // set kbt
150 0 : kbt_ = plumed.getAtoms().getKbT();
151 0 : parse("TEMP", kbt_);
152 :
153 : // set sigma_min
154 0 : double sigma_min = -1.0;
155 0 : parse("SIGMA_MIN",sigma_min);
156 :
157 : // check read
158 0 : checkRead();
159 :
160 : // set parallel stuff
161 0 : unsigned mpisize=comm.Get_size();
162 0 : if(mpisize>1) {
163 0 : error("BAIES supports only OpenMP parallelization");
164 : }
165 : // set number of OpenMP threads
166 0 : nt_ = OpenMP::getNumThreads();
167 :
168 : // read the data file
169 0 : read_data_file(datafile, atoms, sigma_min);
170 :
171 : // print stuff to log
172 0 : log.printf(" number of atoms involved : %u\n", atoms.size());
173 0 : log.printf(" AF2 fit parameters file : %s\n", datafile.c_str());
174 0 : log.printf(" prior type : %s\n", prior.c_str());
175 : // print stuff from data file
176 0 : log.printf(" noise model type : %s\n", mtype_.c_str());
177 0 : log.printf(" number of pairs involved : %u\n", atom_pairs_.size());
178 : // other info
179 0 : log.printf(" temperature of the system in energy unit : %f\n", kbt_);
180 0 : log.printf(" minimum value of sigma : %f\n", sigma_min);
181 0 : if(pbc_) {
182 0 : log.printf(" using periodic boundary conditions\n");
183 : } else {
184 0 : log.printf(" without periodic boundary conditions\n");
185 : }
186 :
187 : // prepare other vectors: data and derivatives
188 0 : atom_der_.resize(atoms.size());
189 :
190 : // add components
191 0 : addComponentWithDerivatives("ene");
192 0 : componentIsNotPeriodic("ene");
193 :
194 : // request atoms
195 0 : requestAtoms(atoms);
196 :
197 : // print bibliography
198 0 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
199 0 : log<<plumed.cite("Schnapka, Morozova, Sen, Bonomi, bioRxiv (2025) doi: XXX");
200 0 : log<<"\n";
201 0 : }
202 :
203 0 : void BAIES::read_data_file(const std::string datafile, const std::vector<AtomNumber> atoms, double smin) {
204 : unsigned id, ai, aj;
205 : double mu, sigma;
206 : // map serials to index in position array
207 : std::map< unsigned, unsigned > index;
208 0 : for(unsigned i=0; i<atoms.size(); ++i) {
209 0 : index[atoms[i].serial()]=i;
210 : }
211 : // open file
212 0 : IFile *ifile = new IFile();
213 0 : if(ifile->FileExist(datafile)) {
214 0 : ifile->open(datafile);
215 : // read constant fields
216 0 : if( ifile->FieldExist("model") ) {
217 0 : ifile->scanField("model",mtype_);
218 0 : if( mtype_!="gaussian" && mtype_!="lognormal" ) {
219 0 : error("Unknown noise model type");
220 : }
221 : } else {
222 0 : error("Missing noise model type in DATA_FILE");
223 : }
224 : // read line-by-line
225 0 : while(ifile->scanField("Id",id)) {
226 0 : ifile->scanField("atom_i",ai);
227 0 : ifile->scanField("atom_j",aj);
228 0 : ifile->scanField("mu",mu);
229 0 : ifile->scanField("sigma",sigma);
230 : // list of pairs of atoms
231 0 : atom_pairs_.push_back(std::make_pair(index[ai],index[aj]));
232 : // list of mus
233 0 : mus_.push_back(mu);
234 : // list of sigmas
235 0 : sigmas_.push_back(std::max(smin,sigma));
236 : // new line
237 0 : ifile->scanField();
238 : }
239 0 : ifile->close();
240 : } else {
241 0 : error("Cannot find DATA_FILE "+datafile+"\n");
242 : }
243 0 : delete ifile;
244 0 : }
245 :
246 : // Energy models
247 0 : double BAIES::getGauss() {
248 : double ene = 0.0;
249 : // loop over atoms pairs
250 0 : #pragma omp parallel num_threads(nt_) shared(ene)
251 : {
252 : std::vector<Vector> omp_der(atom_der_.size());
253 : #pragma omp for reduction( + : ene) nowait
254 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
255 : // get indexes
256 : unsigned ii = atom_pairs_[i].first;
257 : unsigned jj = atom_pairs_[i].second;
258 : // get distance
259 : Vector distance=delta(pos_[ii], pos_[jj]);
260 : const double value=distance.modulo();
261 : const double invvalue=1.0/value;
262 : // get energy
263 : double arg = ( value - mus_[i] ) / sigmas_[i];
264 : ene += arg * arg;
265 : // calculate derivatives
266 : double arg_der = kbt_ * arg / sigmas_[i];
267 : omp_der[ii] += - arg_der * invvalue * distance;
268 : omp_der[jj] += arg_der * invvalue * distance;
269 : }
270 : // gather from tasks
271 : #pragma omp critical
272 : for(unsigned i=0; i<atom_der_.size(); ++i) {
273 : atom_der_[i]+=omp_der[i];
274 : }
275 : }
276 : // missing Gaussian normalization constant
277 0 : return 0.5 * kbt_ * ene;
278 : }
279 :
280 0 : double BAIES::getGaussJeffreys() {
281 : double ene = 0.0;
282 : // loop over all atom pairs
283 0 : #pragma omp parallel num_threads(nt_) shared(ene)
284 : {
285 : std::vector<Vector> omp_der(atom_der_.size());
286 : #pragma omp for reduction( + : ene) nowait
287 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
288 : // get indexes
289 : unsigned ii = atom_pairs_[i].first;
290 : unsigned jj = atom_pairs_[i].second;
291 : // get distance
292 : Vector distance=delta(pos_[ii], pos_[jj]);
293 : const double value=distance.modulo();
294 : const double invvalue=1.0/value;
295 : // get energy
296 : double val_mu = value - mus_[i];
297 : double inv_val_mu = 1.0 / val_mu;
298 : double inv_sigma = 1.0 / sigmas_[i];
299 : double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
300 : double erfval = std::erf(erfcontent);
301 : // increase energy
302 : ene += -std::log( erfval * inv_val_mu );
303 : // calculate derivatives
304 : double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
305 : // increase derivatives
306 : omp_der[ii] += - arg_der * invvalue * distance;
307 : omp_der[jj] += arg_der * invvalue * distance;
308 : }
309 : // gather from tasks
310 : #pragma omp critical
311 : for(unsigned i=0; i<atom_der_.size(); ++i) {
312 : atom_der_[i]+=omp_der[i];
313 : }
314 : }
315 : // missing normalization constants
316 0 : return kbt_ * ene;
317 : }
318 :
319 0 : double BAIES::getGaussCauchy() {
320 : double ene = 0.0;
321 : // loop over all atom pairs
322 0 : #pragma omp parallel num_threads(nt_) shared(ene)
323 : {
324 : std::vector<Vector> omp_der(atom_der_.size());
325 : #pragma omp for reduction( + : ene) nowait
326 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
327 : // get indexes
328 : unsigned ii = atom_pairs_[i].first;
329 : unsigned jj = atom_pairs_[i].second;
330 : // get distance
331 : Vector distance=delta(pos_[ii], pos_[jj]);
332 : const double value=distance.modulo();
333 : const double invvalue=1.0/value;
334 : // get energy
335 : double val_mu = value - mus_[i];
336 : double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
337 : ene += std::log( lncontent );
338 : // calculate derivatives
339 : double arg_der = kbt_ * val_mu / lncontent;
340 : omp_der[ii] += - arg_der * invvalue * distance;
341 : omp_der[jj] += arg_der * invvalue * distance;
342 : }
343 : // gather from tasks
344 : #pragma omp critical
345 : for(unsigned i=0; i<atom_der_.size(); ++i) {
346 : atom_der_[i]+=omp_der[i];
347 : }
348 : }
349 0 : return kbt_ * ene;
350 : }
351 :
352 0 : double BAIES::getLognorm() {
353 : double ene = 0.0;
354 : // loop over atoms pairs
355 0 : #pragma omp parallel num_threads(nt_) shared(ene)
356 : {
357 : std::vector<Vector> omp_der(atom_der_.size());
358 : #pragma omp for reduction( + : ene) nowait
359 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
360 : // get indexes
361 : unsigned ii = atom_pairs_[i].first;
362 : unsigned jj = atom_pairs_[i].second;
363 : // get distance
364 : Vector distance=delta(pos_[ii], pos_[jj]);
365 : const double value=distance.modulo();
366 : const double invvalue=1.0/value;
367 : // get energy
368 : double arg = ( std::log(value) - mus_[i] ) / sigmas_[i];
369 : ene += arg * arg;
370 : // calculate derivatives
371 : double arg_der = kbt_ * arg / (value * sigmas_[i]);
372 : omp_der[ii] += - arg_der * invvalue * distance;
373 : omp_der[jj] += arg_der * invvalue * distance;
374 : }
375 : // gather from tasks
376 : #pragma omp critical
377 : for(unsigned i=0; i<atom_der_.size(); ++i) {
378 : atom_der_[i]+=omp_der[i];
379 : }
380 : }
381 0 : return 0.5 * kbt_ * ene;
382 : }
383 :
384 0 : double BAIES::getLognormJeffreys() {
385 : double ene = 0.0;
386 : // loop over all atom pairs
387 0 : #pragma omp parallel num_threads(nt_) shared(ene)
388 : {
389 : std::vector<Vector> omp_der(atom_der_.size());
390 : #pragma omp for reduction( + : ene) nowait
391 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
392 : // get indexes
393 : unsigned ii = atom_pairs_[i].first;
394 : unsigned jj = atom_pairs_[i].second;
395 : // get distance
396 : Vector distance=delta(pos_[ii], pos_[jj]);
397 : const double value=distance.modulo();
398 : const double invvalue=1.0/value;
399 : // get energy
400 : double val_mu = std::log(value) - mus_[i];
401 : double inv_val_mu = 1 / val_mu;
402 : double inv_sigma = 1 / sigmas_[i];
403 : double erfcontent = val_mu * inv_sigma * inv_sqrt2_;
404 : double erfval = std::erf(erfcontent);
405 : // increase energy
406 : ene += -std::log( erfval * inv_val_mu );
407 : // calculate derivatives
408 : double arg_der = - kbt_ * val_mu * ( (sqrt2_pi_ * std::exp(-erfcontent * erfcontent) * inv_val_mu * inv_sigma ) - ( erfval * inv_val_mu * inv_val_mu ) ) / erfval ;
409 : // chain rule
410 : arg_der *= invvalue;
411 : // increase derivatives
412 : omp_der[ii] += - arg_der * invvalue * distance;
413 : omp_der[jj] += arg_der * invvalue * distance;
414 : }
415 : // gather from tasks
416 : #pragma omp critical
417 : for(unsigned i=0; i<atom_der_.size(); ++i) {
418 : atom_der_[i]+=omp_der[i];
419 : }
420 : }
421 : // missing constants
422 0 : return kbt_ * ene;
423 : }
424 :
425 0 : double BAIES::getLognormCauchy() {
426 : double ene = 0.0;
427 : // loop over all atom pairs
428 0 : #pragma omp parallel num_threads(nt_) shared(ene)
429 : {
430 : std::vector<Vector> omp_der(atom_der_.size());
431 : #pragma omp for reduction( + : ene) nowait
432 : for(unsigned i=0; i<atom_pairs_.size(); ++i) {
433 : // get indexes
434 : unsigned ii = atom_pairs_[i].first;
435 : unsigned jj = atom_pairs_[i].second;
436 : // get distance
437 : Vector distance=delta(pos_[ii], pos_[jj]);
438 : const double value=distance.modulo();
439 : const double invvalue=1.0/value;
440 : // get energy
441 : double val_mu = std::log(value) - mus_[i];
442 : double lncontent = 0.5*val_mu*val_mu + sigmas_[i]*sigmas_[i];
443 : ene += std::log( lncontent );
444 : // calculate derivatives
445 : double arg_der = kbt_ * val_mu * invvalue / lncontent;
446 : omp_der[ii] += - arg_der * invvalue * distance;
447 : omp_der[jj] += arg_der * invvalue * distance;
448 : }
449 : // gather from tasks
450 : #pragma omp critical
451 : for(unsigned i=0; i<atom_der_.size(); ++i) {
452 : atom_der_[i]+=omp_der[i];
453 : }
454 : }
455 : // missing constants
456 0 : return kbt_ * ene;
457 : }
458 :
459 0 : void BAIES::calculate() {
460 : // fix PBCs
461 0 : if(pbc_) {
462 0 : makeWhole();
463 : }
464 :
465 : // get the positions at this step
466 0 : pos_ = getPositions();
467 :
468 : // reset derivatives
469 0 : #pragma omp parallel for num_threads(nt_)
470 : for(unsigned i=0; i<atom_der_.size(); ++i) {
471 : atom_der_[i] = Vector(0,0,0);
472 : }
473 :
474 : // calculate bAIes energy and derivatives
475 : double ene = 0.0;
476 : // Gaussian noise model
477 0 : if(mtype_=="gaussian") {
478 0 : switch(prior_) {
479 0 : case NONE:
480 0 : ene = getGauss();
481 : break;
482 0 : case JEFFREYS:
483 0 : ene = getGaussJeffreys();
484 : break;
485 0 : case CAUCHY:
486 0 : ene = getGaussCauchy();
487 : break;
488 : }
489 : } else {
490 0 : switch(prior_) {
491 0 : case NONE:
492 0 : ene = getLognorm();
493 : break;
494 0 : case JEFFREYS:
495 0 : ene = getLognormJeffreys();
496 : break;
497 0 : case CAUCHY:
498 0 : ene = getLognormCauchy();
499 : break;
500 : }
501 : }
502 : // set score
503 0 : Value* score = getPntrToComponent("ene");
504 : score->set(ene);
505 : // calculate virial
506 0 : Tensor virial;
507 : // declare omp reduction for Tensors
508 : #pragma omp declare reduction( sumTensor : Tensor : omp_out += omp_in )
509 :
510 0 : #pragma omp parallel for num_threads(nt_) reduction (sumTensor : virial)
511 : for(unsigned i=0; i<pos_.size(); ++i) {
512 : virial += Tensor(pos_[i], -atom_der_[i]);
513 : }
514 : // set virial
515 0 : setBoxDerivatives(score, virial);
516 : // set derivatives
517 0 : #pragma omp parallel for num_threads(nt_)
518 : for(unsigned i=0; i<atom_der_.size(); ++i) {
519 : setAtomsDerivatives(score, i, atom_der_[i]);
520 : }
521 0 : }
522 :
523 : }
524 : }
|