001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar. 6, 2014 021 * 022 */ 023package org.biojava.nbio.structure.io; 024 025import org.biojava.nbio.structure.*; 026import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 027import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; 028import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 029import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; 030import org.biojava.nbio.structure.io.mmcif.model.StructConn; 031import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.HashSet; 038import java.util.List; 039import java.util.Map; 040import java.util.Set; 041 042/** 043 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and 044 * intra-group (residue) bonds based on data from the Chemical Component Dictionary 045 * to the Structure object. 046 * 047 * TODO the current implementation adds bonds to the first model only. This 048 * should be sufficient for homogeneous models, but here are a few inhomogeneous models 049 * in the PDB. A better handling of models should be considered in the future. 050 * 051 * @author Peter Rose 052 * @author Ulysse Carion 053 * 054 */ 055public class BondMaker { 056 057 058 private static final Logger logger = LoggerFactory.getLogger(BondMaker.class); 059 060 /** 061 * The types of bonds that are read from struct_conn (type specified in field conn_type_id) 062 */ 063 public static final Set<String> BOND_TYPES_TO_PARSE; 064 static { 065 BOND_TYPES_TO_PARSE = new HashSet<>(); 066 BOND_TYPES_TO_PARSE.add("disulf"); 067 BOND_TYPES_TO_PARSE.add("covale"); 068 BOND_TYPES_TO_PARSE.add("covale_base"); 069 BOND_TYPES_TO_PARSE.add("covale_phosphate"); 070 BOND_TYPES_TO_PARSE.add("covale_sugar"); 071 BOND_TYPES_TO_PARSE.add("modres"); 072 } 073 074 075 /** 076 * Maximum peptide (C - N) bond length considered for bond formation 077 */ 078 private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8; 079 /** 080 * Maximum nucleotide (P - O3') bond length considered for bond formation 081 */ 082 private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1; 083 084 private Structure structure; 085 private FileParsingParameters params; 086 087 public BondMaker(Structure structure, FileParsingParameters params) { 088 this.structure = structure; 089 this.params = params; 090 } 091 092 /** 093 * Creates bond objects and corresponding references in Atom objects: 094 * <li> 095 * peptide bonds: inferred from sequence and distances 096 * </li> 097 * <li> 098 * nucleotide bonds: inferred from sequence and distances 099 * </li> 100 * <li> 101 * intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider} 102 * </li> 103 */ 104 public void makeBonds() { 105 logger.debug("Going to start making bonds"); 106 formPeptideBonds(); 107 formNucleotideBonds(); 108 formIntraResidueBonds(); 109 trimBondLists(); 110 } 111 112 private void formPeptideBonds() { 113 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 114 for (Chain chain : structure.getChains(modelInd)) { 115 List<Group> groups = chain.getSeqResGroups(); 116 117 for (int i = 0; i < groups.size() - 1; i++) { 118 if (!(groups.get(i) instanceof AminoAcidImpl) 119 || !(groups.get(i + 1) instanceof AminoAcidImpl)) 120 continue; 121 122 AminoAcidImpl tail = (AminoAcidImpl) groups.get(i); 123 AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1); 124 125 // atoms with no residue number don't have atom information 126 if (tail.getResidueNumber() == null 127 || head.getResidueNumber() == null) { 128 continue; 129 } 130 131 Atom carboxylC; 132 Atom aminoN; 133 134 carboxylC = tail.getC(); 135 aminoN = head.getN(); 136 137 138 if (carboxylC == null || aminoN == null) { 139 // some structures may be incomplete and not store info 140 // about all of their atoms 141 142 continue; 143 } 144 145 146 if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { 147 new BondImpl(carboxylC, aminoN, 1); 148 } 149 150 } 151 } 152 } 153 } 154 155 private void formNucleotideBonds() { 156 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 157 for (Chain chain : structure.getChains(modelInd)) { 158 List<Group> groups = chain.getSeqResGroups(); 159 160 for (int i = 0; i < groups.size() - 1; i++) { 161 if (!(groups.get(i) instanceof NucleotideImpl) 162 || !(groups.get(i + 1) instanceof NucleotideImpl)) 163 continue; 164 165 NucleotideImpl tail = (NucleotideImpl) groups.get(i); 166 NucleotideImpl head = (NucleotideImpl) groups.get(i + 1); 167 168 // atoms with no residue number don't have atom information 169 if (tail.getResidueNumber() == null 170 || head.getResidueNumber() == null) { 171 continue; 172 } 173 174 Atom phosphorous = head.getP(); 175 Atom oThreePrime = tail.getO3Prime(); 176 177 if (phosphorous == null || oThreePrime == null) { 178 continue; 179 } 180 181 182 if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) { 183 new BondImpl(phosphorous, oThreePrime, 1); 184 } 185 186 } 187 } 188 } 189 } 190 191 private void formIntraResidueBonds() { 192 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 193 for (Chain chain : structure.getChains(modelInd)) { 194 List<Group> groups = chain.getAtomGroups(); 195 for (Group mainGroup : groups) { 196 // atoms with no residue number don't have atom information 197 if (mainGroup.getResidueNumber() == null) { 198 continue; 199 } 200 // Now add support for altLocGroup 201 List<Group> totList = new ArrayList<Group>(); 202 totList.add(mainGroup); 203 for(Group altLoc: mainGroup.getAltLocs()){ 204 totList.add(altLoc); 205 } 206 207 208 // Now iterate through this list 209 for(Group group : totList){ 210 211 ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName()); 212 logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds", 213 group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); 214 215 for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { 216 Atom a = getAtom(chemCompBond.getAtom_id_1(), group); 217 Atom b = getAtom(chemCompBond.getAtom_id_2(), group); 218 if ( a != null && b != null){ 219 int bondOrder = chemCompBond.getNumericalBondOrder(); 220 logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", 221 a.getPDBserial(), a.getName(), b.getPDBserial(), b.getName(), bondOrder); 222 new BondImpl(a, b, bondOrder); 223 } 224 else{ 225 // Some of the atoms were missing. That's fine, there's 226 // nothing to do in this case. 227 } 228 } 229 } 230 } 231 } 232 233 } 234 } 235 236 private Atom getAtom(String atomId, Group group) { 237 Atom a = group.getAtom(atomId); 238 // Check for deuteration 239 if(a==null && atomId.startsWith("H")) { 240 a = group.getAtom(atomId.replaceFirst("H", "D")); 241 // Check it is actually deuterated 242 if(a!=null){ 243 if(!a.getElement().equals(Element.D)){ 244 a=null; 245 } 246 } 247 } 248 return a; 249 } 250 251 private void trimBondLists() { 252 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 253 for (Chain chain : structure.getChains(modelInd)) { 254 for (Group group : chain.getAtomGroups()) { 255 for (Atom atom : group.getAtoms()) { 256 if (atom.getBonds()!=null && atom.getBonds().size() > 0) { 257 ((ArrayList<Bond>) atom.getBonds()).trimToSize(); 258 } 259 } 260 } 261 } 262 } 263 } 264 265 /** 266 * Creates disulfide bond objects and references in the corresponding Atoms objects, given 267 * a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file. 268 * @param disulfideBonds 269 */ 270 public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) { 271 for (SSBondImpl disulfideBond : disulfideBonds) { 272 formDisulfideBond(disulfideBond); 273 } 274 } 275 276 private void formDisulfideBond(SSBondImpl disulfideBond) { 277 try { 278 Map<Integer, Atom> a = getAtomFromRecord("SG", "", "CYS", 279 disulfideBond.getChainID1(), disulfideBond.getResnum1(), 280 disulfideBond.getInsCode1()); 281 Map<Integer, Atom> b = getAtomFromRecord("SG", "", "CYS", 282 disulfideBond.getChainID2(), disulfideBond.getResnum2(), 283 disulfideBond.getInsCode2()); 284 285 for(int i=0; i<structure.nrModels(); i++){ 286 if(a.containsKey(i) && b.containsKey(i)){ 287 // TODO determine what the actual bond order of this bond is; for 288 // now, we're assuming they're single bonds 289 if(!a.get(i).equals(b.get(i))){ 290 Bond ssbond = new BondImpl(a.get(i), b.get(i), 1); 291 structure.addSSBond(ssbond); 292 } 293 } 294 } 295 296 297 } catch (StructureException e) { 298 // Note, in Calpha only mode the CYS SG's are not present. 299 if (! params.isParseCAOnly()) { 300 logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString()); 301 } else { 302 logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode."); 303 } 304 } 305 } 306 307 /** 308 * Creates bond objects from a LinkRecord as parsed from a PDB file 309 * @param linkRecord 310 */ 311 public void formLinkRecordBond(LinkRecord linkRecord) { 312 // only work with atoms that aren't alternate locations 313 if (linkRecord.getAltLoc1().equals(" ") 314 || linkRecord.getAltLoc2().equals(" ")) 315 return; 316 317 try { 318 Map<Integer, Atom> a = getAtomFromRecord(linkRecord.getName1(), 319 linkRecord.getAltLoc1(), linkRecord.getResName1(), 320 linkRecord.getChainID1(), linkRecord.getResSeq1(), 321 linkRecord.getiCode1()); 322 323 Map<Integer, Atom> b = getAtomFromRecord(linkRecord.getName2(), 324 linkRecord.getAltLoc2(), linkRecord.getResName2(), 325 linkRecord.getChainID2(), linkRecord.getResSeq2(), 326 linkRecord.getiCode2()); 327 328 for(int i=0; i<structure.nrModels(); i++){ 329 if(a.containsKey(i) && b.containsKey(i)){ 330 // TODO determine what the actual bond order of this bond is; for 331 // now, we're assuming they're single bonds 332 if(!a.get(i).equals(b.get(i))){ 333 new BondImpl(a.get(i), b.get(i), 1); 334 } 335 } 336 } 337 }catch (StructureException e) { 338 // Note, in Calpha only mode the link atoms may not be present. 339 if (! params.isParseCAOnly()) { 340 logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString()); 341 } else { 342 logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode."); 343 } 344 345 } 346 } 347 348 349 public void formBondsFromStructConn(List<StructConn> structConn) { 350 351 final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit. 352 353 List<Bond> ssbonds = new ArrayList<>(); 354 355 for (StructConn conn : structConn) { 356 357 if (!BOND_TYPES_TO_PARSE.contains(conn.getConn_type_id())) continue; 358 String chainId1; 359 String chainId2; 360 361 chainId1 = conn.getPtnr1_label_asym_id(); 362 chainId2 = conn.getPtnr2_label_asym_id(); 363 364 String insCode1 = ""; 365 if (!conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code(); 366 String insCode2 = ""; 367 if (!conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code(); 368 369 String seqId1 = conn.getPtnr1_auth_seq_id(); 370 String seqId2 = conn.getPtnr2_auth_seq_id(); 371 String resName1 = conn.getPtnr1_label_comp_id(); 372 String resName2 = conn.getPtnr2_label_comp_id(); 373 String atomName1 = conn.getPtnr1_label_atom_id(); 374 String atomName2 = conn.getPtnr2_label_atom_id(); 375 String altLoc1 = ""; 376 if (!conn.getPdbx_ptnr1_label_alt_id().equals("?")) altLoc1 = conn.getPdbx_ptnr1_label_alt_id(); 377 String altLoc2 = ""; 378 if (!conn.getPdbx_ptnr2_label_alt_id().equals("?")) altLoc2 = conn.getPdbx_ptnr2_label_alt_id(); 379 380 // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules. 381 if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) { 382 logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet", 383 atomName1, seqId1, insCode1, atomName2, seqId2, insCode2); 384 continue; 385 } 386 387 388 String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")"; 389 String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")"; 390 391 Map<Integer,Atom> a1 = null; 392 Map<Integer,Atom> a2 = null; 393 394 try { 395 a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1); 396 397 } catch (StructureException e) { 398 399 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1); 400 continue; 401 } 402 try { 403 a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2); 404 } catch (StructureException e) { 405 406 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2); 407 continue; 408 } 409 410 if (a1==null) { 411 // we couldn't find the atom, something must be wrong with the file 412 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1); 413 continue; 414 } 415 if (a2==null) { 416 // we couldn't find the atom, something must be wrong with the file 417 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2); 418 continue; 419 } 420 421 // assuming order 1 for all bonds, no information is provided by struct_conn 422 for(int i=0; i<structure.nrModels(); i++){ 423 Bond bond = null; 424 if(a1.containsKey(i) && a2.containsKey(i)){ 425 if(!a1.get(i).equals(a2.get(i))){ 426 bond = new BondImpl(a1.get(i), a2.get(i), 1); 427 } 428 } 429 if(bond!=null){ 430 if (conn.getConn_type_id().equals("disulf")) { 431 ssbonds.add(bond); 432 } 433 } 434 } 435 436 } 437 438 // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds 439 structure.setSSBonds(ssbonds); 440 } 441 442 private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode) 443 throws StructureException { 444 445 if (iCode==null || iCode.isEmpty()) { 446 iCode = " "; // an insertion code of ' ' is ignored 447 } 448 Map<Integer, Atom> outMap = new HashMap<>(); 449 ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0)); 450 451 for (int i=0; i<structure.nrModels(); i++){ 452 Chain chain = structure.getChain(chainID,i); 453 Group group = chain.getGroupByPDB(resNum); 454 455 Group g = group; 456 // there is an alternate location 457 if (!altLoc.isEmpty()) { 458 g = group.getAltLocGroup(altLoc.charAt(0)); 459 if (g==null) 460 throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID); 461 } 462 Atom a = g.getAtom(name); 463 if (a!=null){ 464 outMap.put(i,a); 465 } 466 } 467 return outMap; 468 } 469}