Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2011-2018 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "PDB.h"
23 : #include "Tools.h"
24 : #include <cstdio>
25 : #include <iostream>
26 :
27 : using namespace std;
28 :
29 : //+PLUMEDOC INTERNAL pdbreader
30 : /*
31 : PLUMED can use the PDB format in several places
32 :
33 : - To read molecular structure (\ref MOLINFO).
34 : - To read reference conformations (\ref RMSD, but also many other methods in \ref dists, \ref FIT_TO_TEMPLATE, etc).
35 :
36 : The implemented PDB reader expects a file formatted correctly according to the
37 : [PDB standard](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html).
38 : In particular, the following columns are read from ATOM records
39 : \verbatim
40 : columns | content
41 : 1-6 | record name (ATOM or HETATM)
42 : 7-11 | serial number of the atom (starting from 1)
43 : 13-16 | atom name
44 : 18-20 | residue name
45 : 22 | chain id
46 : 23-26 | residue number
47 : 31-38 | x coordinate
48 : 39-46 | y coordinate
49 : 47-54 | z coordinate
50 : 55-60 | occupancy
51 : 61-66 | beta factor
52 : \endverbatim
53 : PLUMED parser is slightly more permissive than the official PDB format
54 : in the fact that the format of real numbers is not fixed. In other words,
55 : any parsable real number is ok and the dot can be placed anywhere. However,
56 : __columns are interpret strictly__. A sample PDB should look like the following
57 : \verbatim
58 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
59 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
60 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
61 : \endverbatim
62 :
63 : Notice that serial numbers need not to be consecutive. In the three-line example above,
64 : only the coordinates of three atoms are provided. This is perfectly legal and indicates PLUMED
65 : that information about these atoms only is available. This could be both for structural
66 : information in \ref MOLINFO, where the other atoms would have no name assigned, and for
67 : reference structures used in \ref RMSD, where only the provided atoms would be used to compute RMSD.
68 :
69 : \par Occupancy and beta factors
70 :
71 : PLUMED reads also occupancy and beta factors that however are given a very special meaning.
72 : In cases where the PDB structure is used as a reference for an alignment (that's the case
73 : for instance in \ref RMSD and in \ref FIT_TO_TEMPLATE), the occupancy column is used
74 : to provide the weight of each atom in the alignment. In cases where, perhaps after alignment,
75 : the displacement between running coordinates and the provided PDB is computed, the beta factors
76 : are used as weight for the displacement.
77 : Since setting the weights to zero is the same as __not__ including an atom in the alignement or
78 : displacement calculation, the two following reference files would be equivalent when used in an \ref RMSD
79 : calculation. First file:
80 : \verbatim
81 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
82 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
83 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 0.00 0.00
84 : \endverbatim
85 : Second file:
86 : \verbatim
87 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
88 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
89 : \endverbatim
90 : However notice that many extra atoms with zero weight might slow down the calculation, so
91 : removing lines is better than setting their weights to zero.
92 : In addition, weights for alignment need not to be equivalent to weights for displacement.
93 :
94 :
95 : */
96 : //+ENDPLUMEDOC
97 :
98 :
99 : namespace PLMD {
100 :
101 393 : unsigned PDB::getNumberOfAtomBlocks()const {
102 393 : return block_ends.size();
103 : }
104 :
105 18 : const std::vector<unsigned> & PDB::getAtomBlockEnds()const {
106 18 : return block_ends;
107 : }
108 :
109 8108 : const std::vector<Vector> & PDB::getPositions()const {
110 8108 : return positions;
111 : }
112 :
113 0 : void PDB::setPositions(const std::vector<Vector> &v ) {
114 0 : plumed_assert( v.size()==positions.size() );
115 0 : positions=v;
116 0 : }
117 :
118 11306 : const std::vector<double> & PDB::getOccupancy()const {
119 11306 : return occupancy;
120 : }
121 :
122 11306 : const std::vector<double> & PDB::getBeta()const {
123 11306 : return beta;
124 : }
125 :
126 1104 : const std::vector<std::string> & PDB::getRemark()const {
127 1104 : return remark;
128 : }
129 :
130 686 : void PDB::addRemark( const std::vector<std::string>& v1 ) {
131 686 : remark.insert(remark.begin(),v1.begin(),v1.end());
132 686 : }
133 :
134 17166 : const std::vector<AtomNumber> & PDB::getAtomNumbers()const {
135 17166 : return numbers;
136 : }
137 :
138 616520 : std::string PDB::getAtomName(AtomNumber a)const {
139 616520 : std::map<AtomNumber,unsigned>::const_iterator p;
140 616520 : p=number2index.find(a);
141 616520 : if(p==number2index.end()) return "";
142 616518 : else return atomsymb[p->second];
143 : }
144 :
145 34142 : unsigned PDB::getResidueNumber(AtomNumber a)const {
146 34142 : std::map<AtomNumber,unsigned>::const_iterator p;
147 34142 : p=number2index.find(a);
148 34142 : if(p==number2index.end()) return 0;
149 34140 : else return residue[p->second];
150 : }
151 :
152 34142 : std::string PDB::getResidueName(AtomNumber a) const {
153 34142 : std::map<AtomNumber,unsigned>::const_iterator p;
154 34142 : p=number2index.find(a);
155 34142 : if(p==number2index.end()) return "";
156 34140 : else return residuenames[p->second];
157 : }
158 :
159 50442108 : unsigned PDB::size()const {
160 50442108 : return positions.size();
161 : }
162 :
163 1485 : bool PDB::readFromFilepointer(FILE *fp,bool naturalUnits,double scale) {
164 : //cerr<<file<<endl;
165 1485 : bool file_is_alive=false;
166 1485 : if(naturalUnits) scale=1.0;
167 1485 : string line;
168 1485 : fpos_t pos; bool between_ters=true;
169 67713 : while(Tools::getline(fp,line)) {
170 : //cerr<<line<<"\n";
171 66091 : fgetpos (fp,&pos);
172 66091 : while(line.length()<80) line.push_back(' ');
173 66091 : string record=line.substr(0,6);
174 130834 : string serial=line.substr(6,5);
175 130834 : string atomname=line.substr(12,4);
176 130834 : string residuename=line.substr(17,3);
177 130834 : string chainID=line.substr(21,1);
178 130834 : string resnum=line.substr(22,4);
179 130834 : string x=line.substr(30,8);
180 130834 : string y=line.substr(38,8);
181 130834 : string z=line.substr(46,8);
182 130834 : string occ=line.substr(54,6);
183 130834 : string bet=line.substr(60,6);
184 66091 : Tools::trim(record);
185 66091 : if(record=="TER") { between_ters=false; block_ends.push_back( positions.size() ); }
186 66091 : if(record=="END") { file_is_alive=true; break;}
187 64786 : if(record=="ENDMDL") { file_is_alive=true; break;}
188 64743 : if(record=="REMARK") {
189 682 : vector<string> v1; v1=Tools::getWords(line.substr(6));
190 682 : addRemark( v1 );
191 : }
192 64743 : if(record=="ATOM" || record=="HETATM") {
193 63842 : between_ters=true;
194 63842 : AtomNumber a; unsigned resno;
195 : double o,b;
196 63842 : Vector p;
197 63842 : Tools::convert(serial,a);
198 63842 : Tools::convert(resnum,resno);
199 63842 : Tools::convert(occ,o);
200 63842 : Tools::convert(bet,b);
201 63842 : Tools::convert(x,p[0]);
202 63842 : Tools::convert(y,p[1]);
203 63842 : Tools::convert(z,p[2]);
204 : // scale into nm
205 63842 : p*=scale;
206 63842 : numbers.push_back(a);
207 63842 : number2index[a]=positions.size();
208 63842 : std::size_t startpos=atomname.find_first_not_of(" \t");
209 63842 : std::size_t endpos=atomname.find_last_not_of(" \t");
210 63842 : atomsymb.push_back( atomname.substr(startpos, endpos-startpos+1) );
211 63842 : residue.push_back(resno);
212 63842 : chain.push_back(chainID);
213 63842 : occupancy.push_back(o);
214 63842 : beta.push_back(b);
215 63842 : positions.push_back(p);
216 63842 : residuenames.push_back(residuename);
217 : }
218 64743 : }
219 1485 : if( between_ters ) block_ends.push_back( positions.size() );
220 1485 : return file_is_alive;
221 : }
222 :
223 0 : void PDB::setArgKeyword( const std::string& new_args ) {
224 0 : bool replaced=false;
225 0 : for(unsigned i=0; i<remark.size(); ++i) {
226 0 : if( remark[i].find("ARG=")!=std::string::npos) {
227 0 : remark[i]=new_args; replaced=true;
228 : }
229 : }
230 0 : plumed_assert( replaced );
231 0 : }
232 :
233 162 : bool PDB::read(const std::string&file,bool naturalUnits,double scale) {
234 162 : FILE* fp=fopen(file.c_str(),"r");
235 162 : if(!fp) return false;
236 162 : readFromFilepointer(fp,naturalUnits,scale);
237 162 : fclose(fp);
238 162 : return true;
239 : }
240 :
241 66 : void PDB::getChainNames( std::vector<std::string>& chains ) const {
242 66 : chains.resize(0);
243 66 : chains.push_back( chain[0] );
244 122959 : for(unsigned i=1; i<size(); ++i) {
245 122893 : if( chains[chains.size()-1]!=chain[i] ) chains.push_back( chain[i] );
246 : }
247 66 : }
248 :
249 412 : void PDB::getResidueRange( const std::string& chainname, unsigned& res_start, unsigned& res_end, std::string& errmsg ) const {
250 412 : bool inres=false, foundchain=false;
251 991897 : for(unsigned i=0; i<size(); ++i) {
252 991485 : if( chain[i]==chainname ) {
253 75943 : if(!inres) {
254 412 : if(foundchain) errmsg="found second start of chain named " + chainname;
255 412 : res_start=residue[i];
256 : }
257 75943 : inres=true; foundchain=true;
258 915542 : } else if( inres && chain[i]!=chainname ) {
259 364 : inres=false;
260 364 : res_end=residue[i-1];
261 : }
262 : }
263 412 : if(inres) res_end=residue[size()-1];
264 412 : }
265 :
266 109 : void PDB::getAtomRange( const std::string& chainname, AtomNumber& a_start, AtomNumber& a_end, std::string& errmsg ) const {
267 109 : bool inres=false, foundchain=false;
268 237406 : for(unsigned i=0; i<size(); ++i) {
269 237297 : if( chain[i]==chainname ) {
270 61531 : if(!inres) {
271 109 : if(foundchain) errmsg="found second start of chain named " + chainname;
272 109 : a_start=numbers[i];
273 : }
274 61531 : inres=true; foundchain=true;
275 175766 : } else if( inres && chain[i]!=chainname ) {
276 70 : inres=false;
277 70 : a_end=numbers[i-1];
278 : }
279 : }
280 109 : if(inres) a_end=numbers[size()-1];
281 109 : }
282 :
283 7364 : std::string PDB::getResidueName( const unsigned& resnum ) const {
284 9022831 : for(unsigned i=0; i<size(); ++i) {
285 9022831 : if( residue[i]==resnum ) return residuenames[i];
286 : }
287 0 : return "";
288 : }
289 :
290 4564 : std::string PDB::getResidueName(const unsigned& resnum,const std::string& chainid ) const {
291 5806339 : for(unsigned i=0; i<size(); ++i) {
292 5806339 : if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) return residuenames[i];
293 : }
294 0 : return "";
295 : }
296 :
297 :
298 13260 : AtomNumber PDB::getNamedAtomFromResidue( const std::string& aname, const unsigned& resnum ) const {
299 16347180 : for(unsigned i=0; i<size(); ++i) {
300 16347180 : if( residue[i]==resnum && atomsymb[i]==aname ) return numbers[i];
301 : }
302 0 : std::string num; Tools::convert( resnum, num );
303 0 : plumed_merror("residue " + num + " does not contain an atom named " + aname );
304 0 : return numbers[0]; // This is to stop compiler errors
305 : }
306 :
307 1195 : AtomNumber PDB::getNamedAtomFromResidueAndChain( const std::string& aname, const unsigned& resnum, const std::string& chainid ) const {
308 1003391 : for(unsigned i=0; i<size(); ++i) {
309 1003391 : if( residue[i]==resnum && atomsymb[i]==aname && ( chainid=="*" || chain[i]==chainid) ) return numbers[i];
310 : }
311 0 : std::string num; Tools::convert( resnum, num );
312 0 : plumed_merror("residue " + num + " from chain " + chainid + " does not contain an atom named " + aname );
313 0 : return numbers[0]; // This is to stop compiler errors
314 : }
315 :
316 4266 : std::vector<AtomNumber> PDB::getAtomsInResidue(const unsigned& resnum,const std::string& chainid)const {
317 4266 : std::vector<AtomNumber> tmp;
318 11147058 : for(unsigned i=0; i<size(); ++i) {
319 11142792 : if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) tmp.push_back(numbers[i]);
320 : }
321 4266 : if(tmp.size()==0) {
322 0 : std::string num; Tools::convert( resnum, num );
323 0 : plumed_merror("Cannot find residue " + num + " from chain " + chainid );
324 : }
325 4266 : return tmp;
326 : }
327 :
328 12 : std::vector<AtomNumber> PDB::getAtomsInChain(const std::string& chainid)const {
329 12 : std::vector<AtomNumber> tmp;
330 31356 : for(unsigned i=0; i<size(); ++i) {
331 31344 : if( chainid=="*" || chain[i]==chainid ) tmp.push_back(numbers[i]);
332 : }
333 12 : if(tmp.size()==0) {
334 0 : plumed_merror("Cannot find atoms from chain " + chainid );
335 : }
336 12 : return tmp;
337 : }
338 :
339 4638 : std::string PDB::getChainID(const unsigned& resnumber) const {
340 5689172 : for(unsigned i=0; i<size(); ++i) {
341 5693810 : if(resnumber==residue[i]) return chain[i];
342 : }
343 0 : plumed_merror("Not enough residues in pdb input file");
344 : }
345 :
346 0 : bool PDB::checkForResidue( const std::string& name ) const {
347 0 : for(unsigned i=0; i<size(); ++i) {
348 0 : if( residuenames[i]==name ) return true;
349 : }
350 0 : return false;
351 : }
352 :
353 0 : bool PDB::checkForAtom( const std::string& name ) const {
354 0 : for(unsigned i=0; i<size(); ++i) {
355 0 : if( atomsymb[i]==name ) return true;
356 : }
357 0 : return false;
358 : }
359 :
360 0 : Log& operator<<(Log& ostr, const PDB& pdb) {
361 : char buffer[1000];
362 0 : for(unsigned i=0; i<pdb.positions.size(); i++) {
363 0 : sprintf(buffer,"ATOM %3d %8.3f %8.3f %8.3f\n",pdb.numbers[i].serial(),pdb.positions[i][0],pdb.positions[i][1],pdb.positions[i][2]);
364 0 : ostr<<buffer;
365 : }
366 0 : return ostr;
367 : }
368 :
369 852 : Vector PDB::getPosition(AtomNumber a)const {
370 852 : std::map<AtomNumber,unsigned>::const_iterator p;
371 852 : p=number2index.find(a);
372 852 : if(p==number2index.end()) plumed_merror("atom not available");
373 852 : else return positions[p->second];
374 : }
375 :
376 :
377 :
378 2523 : }
379 :
|