001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar. 6, 2014 021 * 022 */ 023package org.biojava.nbio.structure.io; 024 025import org.biojava.nbio.structure.*; 026import org.biojava.nbio.structure.chem.ChemComp; 027import org.biojava.nbio.structure.chem.ChemCompBond; 028import org.biojava.nbio.structure.chem.ChemCompGroupFactory; 029import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord; 030import org.rcsb.cif.model.ValueKind; 031import org.rcsb.cif.schema.mm.StructConn; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.HashSet; 038import java.util.List; 039import java.util.Map; 040import java.util.Set; 041 042/** 043 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and 044 * intra-group (residue) bonds based on data from the Chemical Component Dictionary 045 * to the Structure object. 046 * 047 * TODO the current implementation adds bonds to the first model only. This 048 * should be sufficient for homogeneous models, but here are a few inhomogeneous models 049 * in the PDB. A better handling of models should be considered in the future. 050 * 051 * @author Peter Rose 052 * @author Ulysse Carion 053 * 054 */ 055public class BondMaker { 056 private static final Logger logger = LoggerFactory.getLogger(BondMaker.class); 057 058 /** 059 * The types of bonds that are read from struct_conn (type specified in field conn_type_id) 060 */ 061 public static final Set<String> BOND_TYPES_TO_PARSE; 062 static { 063 BOND_TYPES_TO_PARSE = new HashSet<>(); 064 BOND_TYPES_TO_PARSE.add("disulf"); 065 BOND_TYPES_TO_PARSE.add("covale"); 066 BOND_TYPES_TO_PARSE.add("covale_base"); 067 BOND_TYPES_TO_PARSE.add("covale_phosphate"); 068 BOND_TYPES_TO_PARSE.add("covale_sugar"); 069 BOND_TYPES_TO_PARSE.add("modres"); 070 } 071 072 073 /** 074 * Maximum peptide (C - N) bond length considered for bond formation 075 */ 076 private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8; 077 /** 078 * Maximum nucleotide (P - O3') bond length considered for bond formation 079 */ 080 private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1; 081 082 private final Structure structure; 083 private final FileParsingParameters params; 084 085 public BondMaker(Structure structure, FileParsingParameters params) { 086 this.structure = structure; 087 this.params = params; 088 } 089 090 /** 091 * Creates bond objects and corresponding references in Atom objects: 092 * <ul> 093 * <li> 094 * peptide bonds: inferred from sequence and distances 095 * </li> 096 * <li> 097 * nucleotide bonds: inferred from sequence and distances 098 * </li> 099 * <li> 100 * intra-group (residue) bonds: read from the chemical component dictionary, via {@link org.biojava.nbio.structure.chem.ChemCompProvider} 101 * </li> 102 * </ul> 103 */ 104 public void makeBonds() { 105 logger.debug("Going to start making bonds"); 106 formPeptideBonds(); 107 formNucleotideBonds(); 108 formIntraResidueBonds(); 109 trimBondLists(); 110 } 111 112 private void formPeptideBonds() { 113 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 114 for (Chain chain : structure.getChains(modelInd)) { 115 List<Group> groups = chain.getSeqResGroups(); 116 117 for (int i = 0; i < groups.size() - 1; i++) { 118 if (!(groups.get(i) instanceof AminoAcidImpl) 119 || !(groups.get(i + 1) instanceof AminoAcidImpl)) 120 continue; 121 122 AminoAcidImpl tail = (AminoAcidImpl) groups.get(i); 123 AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1); 124 125 // atoms with no residue number don't have atom information 126 if (tail.getResidueNumber() == null 127 || head.getResidueNumber() == null) { 128 continue; 129 } 130 131 formBondAltlocAware(tail, "C", head, "N", MAX_PEPTIDE_BOND_LENGTH, 1); 132 } 133 } 134 } 135 } 136 137 private void formNucleotideBonds() { 138 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 139 for (Chain chain : structure.getChains(modelInd)) { 140 List<Group> groups = chain.getSeqResGroups(); 141 142 for (int i = 0; i < groups.size() - 1; i++) { 143 if (!(groups.get(i) instanceof NucleotideImpl) 144 || !(groups.get(i + 1) instanceof NucleotideImpl)) 145 continue; 146 147 NucleotideImpl tail = (NucleotideImpl) groups.get(i); 148 NucleotideImpl head = (NucleotideImpl) groups.get(i + 1); 149 150 // atoms with no residue number don't have atom information 151 if (tail.getResidueNumber() == null 152 || head.getResidueNumber() == null) { 153 continue; 154 } 155 156 formBondAltlocAware(head, "P", tail, "O3'", MAX_NUCLEOTIDE_BOND_LENGTH, 1); 157 } 158 } 159 } 160 } 161 162 private void formIntraResidueBonds() { 163 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 164 for (Chain chain : structure.getChains(modelInd)) { 165 List<Group> groups = chain.getAtomGroups(); 166 for (Group mainGroup : groups) { 167 // atoms with no residue number don't have atom information 168 if (mainGroup.getResidueNumber() == null) { 169 continue; 170 } 171 // Now add support for altLocGroup 172 List<Group> totList = new ArrayList<>(); 173 totList.add(mainGroup); 174 totList.addAll(mainGroup.getAltLocs()); 175 176 // Now iterate through this list 177 for(Group group : totList){ 178 179 ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName()); 180 logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds", 181 group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size()); 182 183 for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { 184 // note we don't check distance to make this call not too expensive 185 formBondAltlocAware(group, chemCompBond.getAtomId1(), 186 group, chemCompBond.getAtomId2(), -1, chemCompBond.getNumericalBondOrder()); 187 } 188 } 189 } 190 } 191 192 } 193 } 194 195 /** 196 * Form bond between atoms of the given names and groups, respecting alt loc rules to form bonds: 197 * no bonds between differently named alt locs (that are not the default alt loc '.') 198 * and multiple bonds for default alt loc to named alt loc. 199 * @param g1 first group 200 * @param name1 name of atom in first group 201 * @param g2 second group 202 * @param name2 name of atom in second group 203 * @param maxAllowedLength max length, if atoms distance above this length no bond will be added. If negative no check on distance is performed. 204 * @param bondOrder the bond order to be set in the created bond(s) 205 */ 206 private void formBondAltlocAware(Group g1, String name1, Group g2, String name2, double maxAllowedLength, int bondOrder) { 207 List<Atom> a1s = getAtoms(g1, name1); 208 List<Atom> a2s = getAtoms(g2, name2); 209 210 if (a1s.isEmpty() || a2s.isEmpty()) { 211 // some structures may be incomplete and not store info 212 // about all of their atoms 213 return; 214 } 215 216 for (Atom a1:a1s) { 217 for (Atom a2:a2s) { 218 if (a1.getAltLoc() != null && a2.getAltLoc()!=null && 219 !a1.getAltLoc().equals(' ') && !a2.getAltLoc().equals(' ') && 220 !a1.getAltLoc().equals(a2.getAltLoc())) { 221 logger.debug("Skipping bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')", 222 a1.toString(), a1.getAltLoc(), a2.toString(), a2.getAltLoc()); 223 continue; 224 } 225 if (maxAllowedLength<0) { 226 // negative maxAllowedLength means we don't check distance and always add bond 227 logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}", 228 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder); 229 new BondImpl(a1, a2, bondOrder); 230 } else { 231 if (Calc.getDistance(a1, a2) < maxAllowedLength) { 232 logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}. Distance is below {}", 233 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); 234 new BondImpl(a1, a2, bondOrder); 235 } else { 236 logger.debug("Not forming bond between atoms {}-{} and {}-{} with bond order {}, because distance is above {}", 237 a1.getPDBserial(), a1.getName(), a2.getPDBserial(), a2.getName(), bondOrder, maxAllowedLength); 238 } 239 } 240 } 241 } 242 } 243 244 /** 245 * Get all atoms (including possible alt locs) in given group that are name with the given atom name 246 * @param g the group 247 * @param name the atom name 248 * @return list of all atoms, or empty list if no atoms with the name 249 */ 250 private List<Atom> getAtoms(Group g, String name) { 251 List<Atom> atoms = new ArrayList<>(); 252 List<Group> groupsWithAltLocs = new ArrayList<>(); 253 groupsWithAltLocs.add(g); 254 groupsWithAltLocs.addAll(g.getAltLocs()); 255 for (Group group : groupsWithAltLocs) { 256 Atom a = group.getAtom(name); 257 // Check for deuteration 258 if (a==null && name.startsWith("H")) { 259 a = group.getAtom(name.replaceFirst("H", "D")); 260 // Check it is actually deuterated 261 if (a!=null && !a.getElement().equals(Element.D)){ 262 a=null; 263 } 264 } 265 if (a!=null) 266 atoms.add(a); 267 } 268 return atoms; 269 } 270 271 private void trimBondLists() { 272 for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){ 273 for (Chain chain : structure.getChains(modelInd)) { 274 for (Group group : chain.getAtomGroups()) { 275 for (Atom atom : group.getAtoms()) { 276 if (atom.getBonds()!=null && atom.getBonds().size() > 0) { 277 ((ArrayList<Bond>) atom.getBonds()).trimToSize(); 278 } 279 } 280 } 281 } 282 } 283 } 284 285 /** 286 * Creates disulfide bond objects and references in the corresponding Atoms objects, given 287 * a list of {@link SSBondImpl}s parsed from a PDB file. 288 * @param disulfideBonds 289 */ 290 public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) { 291 for (SSBondImpl disulfideBond : disulfideBonds) { 292 formDisulfideBond(disulfideBond); 293 } 294 } 295 296 private void formDisulfideBond(SSBondImpl disulfideBond) { 297 try { 298 // The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids, 299 // thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms 300 // See issue https://github.com/biojava/biojava/issues/929 301 Chain polyChain1 = structure.getPolyChainByPDB(disulfideBond.getChainID1()); 302 Chain polyChain2 = structure.getPolyChainByPDB(disulfideBond.getChainID2()); 303 List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID1()); 304 List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID2()); 305 306 List<String> allChainIds1 = new ArrayList<>(); 307 List<String> allChainIds2 = new ArrayList<>(); 308 if (polyChain1!=null) allChainIds1.add(polyChain1.getId()); 309 if (polyChain2!=null) allChainIds2.add(polyChain2.getId()); 310 if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId())); 311 if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId())); 312 313 Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum1(), disulfideBond.getInsCode1(), allChainIds1); 314 315 Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum2(), disulfideBond.getInsCode2(), allChainIds2); 316 317 for(int i=0; i<structure.nrModels(); i++){ 318 if(a.containsKey(i) && b.containsKey(i)){ 319 // TODO determine what the actual bond order of this bond is; for 320 // now, we're assuming they're single bonds 321 if(!a.get(i).equals(b.get(i))){ 322 Bond ssbond = new BondImpl(a.get(i), b.get(i), 1); 323 structure.addSSBond(ssbond); 324 } 325 } 326 } 327 328 329 } catch (StructureException e) { 330 // Note, in Calpha only mode the CYS SG's are not present. 331 if (! params.isParseCAOnly()) { 332 logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString()); 333 } else { 334 logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode."); 335 } 336 } 337 } 338 339 /** 340 * Creates bond objects from a LinkRecord as parsed from a PDB file 341 * @param linkRecord the PDB-format LINK record 342 */ 343 public void formLinkRecordBond(LinkRecord linkRecord) { 344 // only work with atoms that aren't alternate locations 345 if (" ".equals(linkRecord.getAltLoc1()) 346 || " ".equals(linkRecord.getAltLoc2())) 347 return; 348 349 try { 350 // The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids, 351 // thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms 352 // See issue https://github.com/biojava/biojava/issues/943 353 Chain polyChain1 = structure.getPolyChainByPDB(linkRecord.getChainID1()); 354 Chain polyChain2 = structure.getPolyChainByPDB(linkRecord.getChainID2()); 355 List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(linkRecord.getChainID1()); 356 List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(linkRecord.getChainID2()); 357 Chain waterChain1 = structure.getWaterChainByPDB(linkRecord.getChainID1()); 358 Chain waterChain2 = structure.getWaterChainByPDB(linkRecord.getChainID2()); 359 360 List<String> allChainIds1 = new ArrayList<>(); 361 List<String> allChainIds2 = new ArrayList<>(); 362 if (polyChain1!=null) allChainIds1.add(polyChain1.getId()); 363 if (polyChain2!=null) allChainIds2.add(polyChain2.getId()); 364 if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId())); 365 if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId())); 366 if (waterChain1!=null && "HOH".equals(linkRecord.getResName1())) allChainIds1.add(waterChain1.getId()); 367 if (waterChain2!=null && "HOH".equals(linkRecord.getResName2())) allChainIds2.add(waterChain2.getId()); 368 369 Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds(linkRecord.getName1(), linkRecord.getAltLoc1(), linkRecord.getResSeq1(), linkRecord.getiCode1(), allChainIds1); 370 371 Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds(linkRecord.getName2(), linkRecord.getAltLoc2(), linkRecord.getResSeq2(), linkRecord.getiCode2(), allChainIds2); 372 373 for(int i=0; i<structure.nrModels(); i++){ 374 if(a.containsKey(i) && b.containsKey(i)){ 375 // TODO determine what the actual bond order of this bond is; for 376 // now, we're assuming they're single bonds 377 if(!a.get(i).equals(b.get(i))){ 378 new BondImpl(a.get(i), b.get(i), 1); 379 } 380 } 381 } 382 } catch (StructureException e) { 383 // Note, in Calpha only mode the link atoms may not be present. 384 if (! params.isParseCAOnly()) { 385 logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString()); 386 } else { 387 logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode."); 388 } 389 390 } 391 } 392 393 private Map<Integer, Atom> getAtomFromRecordTryMultipleChainIds(String name, String altLoc, String resSeq, String iCode, List<String> chainIds) throws StructureException { 394 Map<Integer, Atom> a = null; 395 for (String chainId : chainIds) { 396 try { 397 a = getAtomFromRecord(name, altLoc, chainId, resSeq, iCode); 398 // first instance that doesn't give an exception will be considered the right one. Not much more we can do here 399 break; 400 } catch (StructureException e) { 401 logger.debug("Tried to get atom {} {} {} (alt loc {}) from chain id {}, but did not find it", name, resSeq, iCode, altLoc, chainId); 402 } 403 } 404 if (a == null) { 405 throw new StructureException("Could not find atom "+name+" "+resSeq+" "+iCode+" (alt loc "+altLoc+")"); 406 } 407 return a; 408 } 409 410 411 public void formBondsFromStructConn(StructConn conn) { 412 final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit. 413 List<Bond> ssbonds = new ArrayList<>(); 414 415 for (int i = 0; i < conn.getRowCount(); i++) { 416 if (!BOND_TYPES_TO_PARSE.contains(conn.getConnTypeId().get(i))) continue; 417 String chainId1; 418 String chainId2; 419 420 chainId1 = conn.getPtnr1LabelAsymId().get(i); 421 chainId2 = conn.getPtnr2LabelAsymId().get(i); 422 423 String insCode1 = ""; 424 if (conn.getPdbxPtnr1PDBInsCode().getValueKind(i) == ValueKind.PRESENT) { 425 insCode1 = conn.getPdbxPtnr1PDBInsCode().get(i); 426 } 427 String insCode2 = ""; 428 if (conn.getPdbxPtnr2PDBInsCode().getValueKind(i) == ValueKind.PRESENT) { 429 insCode2 = conn.getPdbxPtnr2PDBInsCode().get(i); 430 } 431 432 String seqId1 = conn.getPtnr1AuthSeqId().getStringData(i); 433 String seqId2 = conn.getPtnr2AuthSeqId().getStringData(i); 434 String resName1 = conn.getPtnr1LabelCompId().get(i); 435 String resName2 = conn.getPtnr2LabelCompId().get(i); 436 String atomName1 = conn.getPtnr1LabelAtomId().get(i); 437 String atomName2 = conn.getPtnr2LabelAtomId().get(i); 438 String altLoc1 = ""; 439 if (conn.getPdbxPtnr1LabelAltId().getValueKind(i) == ValueKind.PRESENT) { 440 altLoc1 = conn.getPdbxPtnr1LabelAltId().get(i); 441 } 442 String altLoc2 = ""; 443 if (conn.getPdbxPtnr2LabelAltId().getValueKind(i) == ValueKind.PRESENT) { 444 altLoc2 = conn.getPdbxPtnr2LabelAltId().get(i); 445 } 446 447 // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules. 448 if (!conn.getPtnr1Symmetry().get(i).equals(symop) || !conn.getPtnr2Symmetry().get(i).equals(symop) ) { 449 logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet", 450 atomName1, seqId1, insCode1, atomName2, seqId2, insCode2); 451 continue; 452 } 453 454 455 String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")"; 456 String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")"; 457 458 Map<Integer,Atom> a1 = null; 459 Map<Integer,Atom> a2 = null; 460 461 try { 462 a1 = getAtomFromRecord(atomName1, altLoc1, chainId1, seqId1, insCode1); 463 464 } catch (StructureException e) { 465 466 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1); 467 continue; 468 } 469 try { 470 a2 = getAtomFromRecord(atomName2, altLoc2, chainId2, seqId2, insCode2); 471 } catch (StructureException e) { 472 473 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2); 474 continue; 475 } 476 477 if (a1==null) { 478 // we couldn't find the atom, something must be wrong with the file 479 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1); 480 continue; 481 } 482 if (a2==null) { 483 // we couldn't find the atom, something must be wrong with the file 484 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2); 485 continue; 486 } 487 488 // assuming order 1 for all bonds, no information is provided by struct_conn 489 for (int j = 0; j < structure.nrModels(); j++) { 490 Bond bond = null; 491 if (a1.containsKey(j) && a2.containsKey(j)) { 492 if (!a1.get(j).equals(a2.get(j))) { 493 bond = new BondImpl(a1.get(j), a2.get(j), 1); 494 } 495 } 496 if (bond != null) { 497 if (conn.getConnTypeId().get(i).equals("disulf")) { 498 ssbonds.add(bond); 499 } 500 } 501 } 502 } 503 504 // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds 505 structure.setSSBonds(ssbonds); 506 } 507 508 private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String chainID, String resSeq, String iCode) 509 throws StructureException { 510 511 if (iCode==null || iCode.isEmpty()) { 512 iCode = " "; // an insertion code of ' ' is ignored 513 } 514 Map<Integer, Atom> outMap = new HashMap<>(); 515 ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0)); 516 517 for (int i=0; i<structure.nrModels(); i++){ 518 Chain chain = structure.getChain(chainID,i); 519 Group group = chain.getGroupByPDB(resNum); 520 521 Group g = group; 522 // there is an alternate location 523 if (!altLoc.isEmpty()) { 524 g = group.getAltLocGroup(altLoc.charAt(0)); 525 if (g==null) 526 throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID); 527 } 528 Atom a = g.getAtom(name); 529 if (a!=null){ 530 outMap.put(i,a); 531 } 532 } 533 return outMap; 534 } 535}