001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar. 6, 2014 021 * 022 */ 023package org.biojava.nbio.structure.io; 024 025import org.biojava.nbio.structure.*; 026import org.biojava.nbio.structure.chem.ChemComp; 027import org.biojava.nbio.structure.chem.ChemCompBond; 028import org.biojava.nbio.structure.chem.ChemCompGroupFactory; 029import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord; 030import org.rcsb.cif.model.ValueKind; 031import org.rcsb.cif.schema.mm.StructConn; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.HashSet; 038import java.util.List; 039import java.util.Map; 040import java.util.Set; 041 042/** 043 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and 044 * intra-group (residue) bonds based on data from the Chemical Component Dictionary 045 * to the Structure object. 046 * 047 * TODO the current implementation adds bonds to the first model only. This 048 * should be sufficient for homogeneous models, but here are a few inhomogeneous models 049 * in the PDB. A better handling of models should be considered in the future. 050 * 051 * @author Peter Rose 052 * @author Ulysse Carion 053 * 054 */ 055public class BondMaker { 056 private static final Logger logger = LoggerFactory.getLogger(BondMaker.class); 057 058 /** 059 * The types of bonds that are read from struct_conn (type specified in field conn_type_id) 060 */ 061 public static final Set<String> BOND_TYPES_TO_PARSE; 062 static { 063 BOND_TYPES_TO_PARSE = new HashSet<>(); 064 BOND_TYPES_TO_PARSE.add("disulf"); 065 BOND_TYPES_TO_PARSE.add("covale"); 066 BOND_TYPES_TO_PARSE.add("covale_base"); 067 BOND_TYPES_TO_PARSE.add("covale_phosphate"); 068 BOND_TYPES_TO_PARSE.add("covale_sugar"); 069 BOND_TYPES_TO_PARSE.add("modres"); 070 } 071 072 073 /** 074 * Maximum peptide (C - N) bond length considered for bond formation 075 */ 076 private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8; 077 /** 078 * Maximum nucleotide (P - O3') bond length considered for bond formation 079 */ 080 private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1; 081 082 private final Structure structure; 083 private final FileParsingParameters params; 084 085 public BondMaker(Structure structure, FileParsingParameters params) { 086 this.structure = structure; 087 this.params = params; 088 } 089 090 /** 091 * Creates bond objects and corresponding references in Atom objects: 092 * <li> 093 * peptide bonds: inferred from sequence and distances 094 * </li> 095 * <li> 096 * nucleotide bonds: inferred from sequence and distances 097 * </li> 098 * <li> 099 * intra-group (residue) bonds: read from the chemical component dictionary, via {@link org.biojava.nbio.structure.chem.ChemCompProvider} 100 * </li> 101 */ 102 public void makeBonds() { 103 logger.debug("Going to start making bonds"); 104 formPeptideBonds(); 105 formNucleotideBonds(); 106 formIntraResidueBonds(); 107 trimBondLists(); 108 } 109 110 private void formPeptideBonds() { 111 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 112 for (Chain chain : structure.getChains(modelInd)) { 113 List<Group> groups = chain.getSeqResGroups(); 114 115 for (int i = 0; i < groups.size() - 1; i++) { 116 if (!(groups.get(i) instanceof AminoAcidImpl) 117 || !(groups.get(i + 1) instanceof AminoAcidImpl)) 118 continue; 119 120 AminoAcidImpl tail = (AminoAcidImpl) groups.get(i); 121 AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1); 122 123 // atoms with no residue number don't have atom information 124 if (tail.getResidueNumber() == null 125 || head.getResidueNumber() == null) { 126 continue; 127 } 128 129 formBondAltlocAware(tail, "C", head, "N", MAX_PEPTIDE_BOND_LENGTH, 1); 130 } 131 } 132 } 133 } 134 135 private void formNucleotideBonds() { 136 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 137 for (Chain chain : structure.getChains(modelInd)) { 138 List<Group> groups = chain.getSeqResGroups(); 139 140 for (int i = 0; i < groups.size() - 1; i++) { 141 if (!(groups.get(i) instanceof NucleotideImpl) 142 || !(groups.get(i + 1) instanceof NucleotideImpl)) 143 continue; 144 145 NucleotideImpl tail = (NucleotideImpl) groups.get(i); 146 NucleotideImpl head = (NucleotideImpl) groups.get(i + 1); 147 148 // atoms with no residue number don't have atom information 149 if (tail.getResidueNumber() == null 150 || head.getResidueNumber() == null) { 151 continue; 152 } 153 154 formBondAltlocAware(head, "P", tail, "O3'", MAX_NUCLEOTIDE_BOND_LENGTH, 1); 155 } 156 } 157 } 158 } 159 160 private void formIntraResidueBonds() { 161 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 162 for (Chain chain : structure.getChains(modelInd)) { 163 List<Group> groups = chain.getAtomGroups(); 164 for (Group mainGroup : groups) { 165 // atoms with no residue number don't have atom information 166 if (mainGroup.getResidueNumber() == null) { 167 continue; 168 } 169 // Now add support for altLocGroup 170 List<Group> totList = new ArrayList<>(); 171 totList.add(mainGroup); 172 totList.addAll(mainGroup.getAltLocs()); 173 174 // Now iterate through this list 175 for(Group group : totList){ 176 177 ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName()); 178 logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds", 179 group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); 180 181 for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { 182 // note we don't check distance to make this call not too expensive 183 formBondAltlocAware(group, chemCompBond.getAtomId1(), 184 group, chemCompBond.getAtomId2(), -1, chemCompBond.getNumericalBondOrder()); 185 } 186 } 187 } 188 } 189 190 } 191 } 192 193 /** 194 * Form bond between atoms of the given names and groups, respecting alt loc rules to form bonds: 195 * no bonds between differently named alt locs (that are not the default alt loc '.') 196 * and multiple bonds for default alt loc to named alt loc. 197 * @param g1 first group 198 * @param name1 name of atom in first group 199 * @param g2 second group 200 * @param name2 name of atom in second group 201 * @param maxAllowedLength max length, if atoms distance above this length no bond will be added. If negative no check on distance is performed. 202 * @param bondOrder the bond order to be set in the created bond(s) 203 */ 204 private void formBondAltlocAware(Group g1, String name1, Group g2, String name2, double maxAllowedLength, int bondOrder) { 205 List<Atom> a1s = getAtoms(g1, name1); 206 List<Atom> a2s = getAtoms(g2, name2); 207 208 if (a1s.isEmpty() || a2s.isEmpty()) { 209 // some structures may be incomplete and not store info 210 // about all of their atoms 211 return; 212 } 213 214 for (Atom a1:a1s) { 215 for (Atom a2:a2s) { 216 if (a1.getAltLoc() != null && a2.getAltLoc()!=null && 217 a1.getAltLoc()!=' ' && a2.getAltLoc()!=' ' && 218 a1.getAltLoc() != a2.getAltLoc()) { 219 logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", 220 a1.toString(), a1.getAltLoc(), a2.toString(), a2.getAltLoc()); 221 continue; 222 } 223 if (maxAllowedLength<0) { 224 // negative maxAllowedLength means we don't check distance and always add bond 225 logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", 226 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder); 227 new BondImpl(a1, a2, bondOrder); 228 } else { 229 if (Calc.getDistance(a1, a2) < maxAllowedLength) { 230 logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}. Distance is below {}", 231 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); 232 new BondImpl(a1, a2, bondOrder); 233 } else { 234 logger.debug("Not forming bond between atoms {}-{} and {}-{} with bond order {}, because distance is above {}", 235 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); 236 } 237 } 238 } 239 } 240 } 241 242 /** 243 * Get all atoms (including possible alt locs) in given group that are name with the given atom name 244 * @param g the group 245 * @param name the atom name 246 * @return list of all atoms, or empty list if no atoms with the name 247 */ 248 private List<Atom> getAtoms(Group g, String name) { 249 List<Atom> atoms = new ArrayList<>(); 250 List<Group> groupsWithAltLocs = new ArrayList<>(); 251 groupsWithAltLocs.add(g); 252 groupsWithAltLocs.addAll(g.getAltLocs()); 253 for (Group group : groupsWithAltLocs) { 254 Atom a = group.getAtom(name); 255 // Check for deuteration 256 if (a==null && name.startsWith("H")) { 257 a = group.getAtom(name.replaceFirst("H", "D")); 258 // Check it is actually deuterated 259 if (a!=null && !a.getElement().equals(Element.D)){ 260 a=null; 261 } 262 } 263 if (a!=null) 264 atoms.add(a); 265 } 266 return atoms; 267 } 268 269 private void trimBondLists() { 270 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 271 for (Chain chain : structure.getChains(modelInd)) { 272 for (Group group : chain.getAtomGroups()) { 273 for (Atom atom : group.getAtoms()) { 274 if (atom.getBonds()!=null && atom.getBonds().size() > 0) { 275 ((ArrayList<Bond>) atom.getBonds()).trimToSize(); 276 } 277 } 278 } 279 } 280 } 281 } 282 283 /** 284 * Creates disulfide bond objects and references in the corresponding Atoms objects, given 285 * a list of {@link SSBondImpl}s parsed from a PDB file. 286 * @param disulfideBonds 287 */ 288 public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) { 289 for (SSBondImpl disulfideBond : disulfideBonds) { 290 formDisulfideBond(disulfideBond); 291 } 292 } 293 294 private void formDisulfideBond(SSBondImpl disulfideBond) { 295 try { 296 // The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids, 297 // thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms 298 // See issue https://github.com/biojava/biojava/issues/929 299 String polyChainId1 = structure.getPolyChainByPDB(disulfideBond.getChainID1()).getId(); 300 String polyChainId2 = structure.getPolyChainByPDB(disulfideBond.getChainID2()).getId(); 301 List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID1()); 302 List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID2()); 303 304 List<String> allChainIds1 = new ArrayList<>(); 305 List<String> allChainIds2 = new ArrayList<>(); 306 if (polyChainId1!=null) allChainIds1.add(polyChainId1); 307 if (polyChainId2!=null) allChainIds2.add(polyChainId2); 308 if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId())); 309 if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId())); 310 311 Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum1(), disulfideBond.getInsCode1(), allChainIds1); 312 313 Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum2(), disulfideBond.getInsCode2(), allChainIds2); 314 315 for(int i=0; i<structure.nrModels(); i++){ 316 if(a.containsKey(i) && b.containsKey(i)){ 317 // TODO determine what the actual bond order of this bond is; for 318 // now, we're assuming they're single bonds 319 if(!a.get(i).equals(b.get(i))){ 320 Bond ssbond = new BondImpl(a.get(i), b.get(i), 1); 321 structure.addSSBond(ssbond); 322 } 323 } 324 } 325 326 327 } catch (StructureException e) { 328 // Note, in Calpha only mode the CYS SG's are not present. 329 if (! params.isParseCAOnly()) { 330 logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString()); 331 } else { 332 logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode."); 333 } 334 } 335 } 336 337 /** 338 * Creates bond objects from a LinkRecord as parsed from a PDB file 339 * @param linkRecord the PDB-format LINK record 340 */ 341 public void formLinkRecordBond(LinkRecord linkRecord) { 342 // only work with atoms that aren't alternate locations 343 if (linkRecord.getAltLoc1().equals(" ") 344 || linkRecord.getAltLoc2().equals(" ")) 345 return; 346 347 try { 348 // The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids, 349 // thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms 350 // See issue https://github.com/biojava/biojava/issues/943 351 String polyChainId1 = structure.getPolyChainByPDB(linkRecord.getChainID1()).getId(); 352 String polyChainId2 = structure.getPolyChainByPDB(linkRecord.getChainID2()).getId(); 353 List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(linkRecord.getChainID1()); 354 List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(linkRecord.getChainID2()); 355 Chain waterChain1 = structure.getWaterChainByPDB(linkRecord.getChainID1()); 356 Chain waterChain2 = structure.getWaterChainByPDB(linkRecord.getChainID2()); 357 358 List<String> allChainIds1 = new ArrayList<>(); 359 List<String> allChainIds2 = new ArrayList<>(); 360 if (polyChainId1!=null) allChainIds1.add(polyChainId1); 361 if (polyChainId2!=null) allChainIds2.add(polyChainId2); 362 if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId())); 363 if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId())); 364 if (waterChain1!=null && linkRecord.getResName1().equals("HOH")) allChainIds1.add(waterChain1.getId()); 365 if (waterChain2!=null && linkRecord.getResName2().equals("HOH")) allChainIds2.add(waterChain2.getId()); 366 367 Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds(linkRecord.getName1(), linkRecord.getAltLoc1(), linkRecord.getResSeq1(), linkRecord.getiCode1(), allChainIds1); 368 369 Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds(linkRecord.getName2(), linkRecord.getAltLoc2(), linkRecord.getResSeq2(), linkRecord.getiCode2(), allChainIds2); 370 371 for(int i=0; i<structure.nrModels(); i++){ 372 if(a.containsKey(i) && b.containsKey(i)){ 373 // TODO determine what the actual bond order of this bond is; for 374 // now, we're assuming they're single bonds 375 if(!a.get(i).equals(b.get(i))){ 376 new BondImpl(a.get(i), b.get(i), 1); 377 } 378 } 379 } 380 } catch (StructureException e) { 381 // Note, in Calpha only mode the link atoms may not be present. 382 if (! params.isParseCAOnly()) { 383 logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString()); 384 } else { 385 logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode."); 386 } 387 388 } 389 } 390 391 private Map<Integer, Atom> getAtomFromRecordTryMultipleChainIds(String name, String altLoc, String resSeq, String iCode, List<String> chainIds) throws StructureException { 392 Map<Integer, Atom> a = null; 393 for (String chainId : chainIds) { 394 try { 395 a = getAtomFromRecord(name, altLoc, chainId, resSeq, iCode); 396 // first instance that doesn't give an exception will be considered the right one. Not much more we can do here 397 break; 398 } catch (StructureException e) { 399 logger.debug("Tried to get atom {} {} {} (alt loc {}) from chain id {}, but did not find it", name, resSeq, iCode, altLoc, chainId); 400 } 401 } 402 if (a == null) { 403 throw new StructureException("Could not find atom "+name+" "+resSeq+" "+iCode+" (alt loc "+altLoc+")"); 404 } 405 return a; 406 } 407 408 409 public void formBondsFromStructConn(StructConn conn) { 410 final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit. 411 List<Bond> ssbonds = new ArrayList<>(); 412 413 for (int i = 0; i < conn.getRowCount(); i++) { 414 if (!BOND_TYPES_TO_PARSE.contains(conn.getConnTypeId().get(i))) continue; 415 String chainId1; 416 String chainId2; 417 418 chainId1 = conn.getPtnr1LabelAsymId().get(i); 419 chainId2 = conn.getPtnr2LabelAsymId().get(i); 420 421 String insCode1 = ""; 422 if (conn.getPdbxPtnr1PDBInsCode().getValueKind(i) == ValueKind.PRESENT) { 423 insCode1 = conn.getPdbxPtnr1PDBInsCode().get(i); 424 } 425 String insCode2 = ""; 426 if (conn.getPdbxPtnr2PDBInsCode().getValueKind(i) == ValueKind.PRESENT) { 427 insCode2 = conn.getPdbxPtnr2PDBInsCode().get(i); 428 } 429 430 String seqId1 = conn.getPtnr1AuthSeqId().getStringData(i); 431 String seqId2 = conn.getPtnr2AuthSeqId().getStringData(i); 432 String resName1 = conn.getPtnr1LabelCompId().get(i); 433 String resName2 = conn.getPtnr2LabelCompId().get(i); 434 String atomName1 = conn.getPtnr1LabelAtomId().get(i); 435 String atomName2 = conn.getPtnr2LabelAtomId().get(i); 436 String altLoc1 = ""; 437 if (conn.getPdbxPtnr1LabelAltId().getValueKind(i) == ValueKind.PRESENT) { 438 altLoc1 = conn.getPdbxPtnr1LabelAltId().get(i); 439 } 440 String altLoc2 = ""; 441 if (conn.getPdbxPtnr2LabelAltId().getValueKind(i) == ValueKind.PRESENT) { 442 altLoc2 = conn.getPdbxPtnr2LabelAltId().get(i); 443 } 444 445 // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules. 446 if (!conn.getPtnr1Symmetry().get(i).equals(symop) || !conn.getPtnr2Symmetry().get(i).equals(symop) ) { 447 logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet", 448 atomName1, seqId1, insCode1, atomName2, seqId2, insCode2); 449 continue; 450 } 451 452 453 String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")"; 454 String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")"; 455 456 Map<Integer,Atom> a1 = null; 457 Map<Integer,Atom> a2 = null; 458 459 try { 460 a1 = getAtomFromRecord(atomName1, altLoc1, chainId1, seqId1, insCode1); 461 462 } catch (StructureException e) { 463 464 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1); 465 continue; 466 } 467 try { 468 a2 = getAtomFromRecord(atomName2, altLoc2, chainId2, seqId2, insCode2); 469 } catch (StructureException e) { 470 471 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2); 472 continue; 473 } 474 475 if (a1==null) { 476 // we couldn't find the atom, something must be wrong with the file 477 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1); 478 continue; 479 } 480 if (a2==null) { 481 // we couldn't find the atom, something must be wrong with the file 482 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2); 483 continue; 484 } 485 486 // assuming order 1 for all bonds, no information is provided by struct_conn 487 for (int j = 0; j < structure.nrModels(); j++) { 488 Bond bond = null; 489 if (a1.containsKey(j) && a2.containsKey(j)) { 490 if (!a1.get(j).equals(a2.get(j))) { 491 bond = new BondImpl(a1.get(j), a2.get(j), 1); 492 } 493 } 494 if (bond != null) { 495 if (conn.getConnTypeId().get(i).equals("disulf")) { 496 ssbonds.add(bond); 497 } 498 } 499 } 500 } 501 502 // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds 503 structure.setSSBonds(ssbonds); 504 } 505 506 private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String chainID, String resSeq, String iCode) 507 throws StructureException { 508 509 if (iCode==null || iCode.isEmpty()) { 510 iCode = " "; // an insertion code of ' ' is ignored 511 } 512 Map<Integer, Atom> outMap = new HashMap<>(); 513 ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0)); 514 515 for (int i=0; i<structure.nrModels(); i++){ 516 Chain chain = structure.getChain(chainID,i); 517 Group group = chain.getGroupByPDB(resNum); 518 519 Group g = group; 520 // there is an alternate location 521 if (!altLoc.isEmpty()) { 522 g = group.getAltLocGroup(altLoc.charAt(0)); 523 if (g==null) 524 throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID); 525 } 526 Atom a = g.getAtom(name); 527 if (a!=null){ 528 outMap.put(i,a); 529 } 530 } 531 return outMap; 532 } 533}