001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar. 6, 2014 021 * 022 */ 023package org.biojava.nbio.structure.io; 024 025import org.biojava.nbio.structure.*; 026import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 027import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; 028import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 029import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; 030import org.biojava.nbio.structure.io.mmcif.model.StructConn; 031import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.util.ArrayList; 036import java.util.HashSet; 037import java.util.List; 038import java.util.Set; 039 040/** 041 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and 042 * intra-group (residue) bonds based on data from the Chemical Component Dictionary 043 * to the Structure object. 044 * 045 * TODO the current implementation adds bonds to the first model only. This 046 * should be sufficient for homogeneous models, but here are a few inhomogeneous models 047 * in the PDB. A better handling of models should be considered in the future. 048 * 049 * @author Peter Rose 050 * @author Ulysse Carion 051 * 052 */ 053public class BondMaker { 054 055 056 private static final Logger logger = LoggerFactory.getLogger(BondMaker.class); 057 058 /** 059 * The types of bonds that are read from struct_conn (type specified in field conn_type_id) 060 */ 061 public static final Set<String> BOND_TYPES_TO_PARSE; 062 static { 063 BOND_TYPES_TO_PARSE = new HashSet<>(); 064 BOND_TYPES_TO_PARSE.add("disulf"); 065 BOND_TYPES_TO_PARSE.add("covale"); 066 BOND_TYPES_TO_PARSE.add("covale_base"); 067 BOND_TYPES_TO_PARSE.add("covale_phosphate"); 068 BOND_TYPES_TO_PARSE.add("covale_sugar"); 069 BOND_TYPES_TO_PARSE.add("modres"); 070 } 071 072 073 /** 074 * Maximum peptide (C - N) bond length considered for bond formation 075 */ 076 private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8; 077 /** 078 * Maximum nucleotide (P - O3') bond length considered for bond formation 079 */ 080 private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1; 081 082 private Structure structure; 083 private FileParsingParameters params; 084 085 public BondMaker(Structure structure, FileParsingParameters params) { 086 this.structure = structure; 087 this.params = params; 088 } 089 090 /** 091 * Creates bond objects and corresponding references in Atom objects: 092 * <li> 093 * peptide bonds: inferred from sequence and distances 094 * </li> 095 * <li> 096 * nucleotide bonds: inferred from sequence and distances 097 * </li> 098 * <li> 099 * intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider} 100 * </li> 101 */ 102 public void makeBonds() { 103 formPeptideBonds(); 104 formNucleotideBonds(); 105 formIntraResidueBonds(); 106 trimBondLists(); 107 } 108 109 private void formPeptideBonds() { 110 for (Chain chain : structure.getChains()) { 111 List<Group> groups = chain.getSeqResGroups(); 112 113 for (int i = 0; i < groups.size() - 1; i++) { 114 if (!(groups.get(i) instanceof AminoAcidImpl) 115 || !(groups.get(i + 1) instanceof AminoAcidImpl)) 116 continue; 117 118 AminoAcidImpl tail = (AminoAcidImpl) groups.get(i); 119 AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1); 120 121 // atoms with no residue number don't have atom information 122 if (tail.getResidueNumber() == null 123 || head.getResidueNumber() == null) { 124 continue; 125 } 126 127 Atom carboxylC; 128 Atom aminoN; 129 130 carboxylC = tail.getC(); 131 aminoN = head.getN(); 132 133 134 if (carboxylC == null || aminoN == null) { 135 // some structures may be incomplete and not store info 136 // about all of their atoms 137 138 continue; 139 } 140 141 142 if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { 143 new BondImpl(carboxylC, aminoN, 1); 144 } 145 146 } 147 } 148 } 149 150 private void formNucleotideBonds() { 151 for (Chain chain : structure.getChains()) { 152 List<Group> groups = chain.getSeqResGroups(); 153 154 for (int i = 0; i < groups.size() - 1; i++) { 155 if (!(groups.get(i) instanceof NucleotideImpl) 156 || !(groups.get(i + 1) instanceof NucleotideImpl)) 157 continue; 158 159 NucleotideImpl tail = (NucleotideImpl) groups.get(i); 160 NucleotideImpl head = (NucleotideImpl) groups.get(i + 1); 161 162 // atoms with no residue number don't have atom information 163 if (tail.getResidueNumber() == null 164 || head.getResidueNumber() == null) { 165 continue; 166 } 167 168 Atom phosphorous = head.getP(); 169 Atom oThreePrime = tail.getO3Prime(); 170 171 if (phosphorous == null || oThreePrime == null) { 172 continue; 173 } 174 175 176 if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) { 177 new BondImpl(phosphorous, oThreePrime, 1); 178 } 179 180 } 181 } 182 } 183 184 private void formIntraResidueBonds() { 185 for (Chain chain : structure.getChains()) { 186 List<Group> groups = chain.getAtomGroups(); 187 188 for (Group mainGroup : groups) { 189 // atoms with no residue number don't have atom information 190 if (mainGroup.getResidueNumber() == null) { 191 continue; 192 } 193 194 // Now add support for altLocGroup 195 List<Group> totList = new ArrayList<Group>(); 196 totList.add(mainGroup); 197 for(Group altLoc: mainGroup.getAltLocs()){ 198 totList.add(altLoc); 199 } 200 // Now iterate through this list 201 for(Group group : totList){ 202 203 ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group 204 .getPDBName()); 205 206 for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { 207 208 Atom a = group.getAtom(chemCompBond.getAtom_id_1()); 209 Atom b = group.getAtom(chemCompBond.getAtom_id_2()); 210 if ( a != null && b != null){ 211 212 int bondOrder = chemCompBond.getNumericalBondOrder(); 213 214 new BondImpl(a, b, bondOrder); 215 } else { 216 217 // Some of the atoms were missing. That's fine, there's 218 // nothing to do in this case. 219 } 220 } 221 } 222 } 223 } 224 } 225 226 227 private void trimBondLists() { 228 for (Chain chain : structure.getChains()) { 229 for (Group group : chain.getAtomGroups()) { 230 for (Atom atom : group.getAtoms()) { 231 if (atom.getBonds()!=null && atom.getBonds().size() > 0) { 232 ((ArrayList<Bond>) atom.getBonds()).trimToSize(); 233 } 234 } 235 } 236 } 237 } 238 239 /** 240 * Creates disulfide bond objects and references in the corresponding Atoms objects, given 241 * a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file. 242 * @param disulfideBonds 243 */ 244 public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) { 245 List<Bond> bonds = new ArrayList<>(); 246 for (SSBondImpl disulfideBond : disulfideBonds) { 247 Bond bond = formDisulfideBond(disulfideBond); 248 if (bond!=null) bonds.add(bond); 249 } 250 structure.setSSBonds(bonds); 251 } 252 253 private Bond formDisulfideBond(SSBondImpl disulfideBond) { 254 try { 255 Atom a = getAtomFromRecord("SG", "", "CYS", 256 disulfideBond.getChainID1(), disulfideBond.getResnum1(), 257 disulfideBond.getInsCode1()); 258 Atom b = getAtomFromRecord("SG", "", "CYS", 259 disulfideBond.getChainID2(), disulfideBond.getResnum2(), 260 disulfideBond.getInsCode2()); 261 262 Bond ssbond = new BondImpl(a, b, 1); 263 264 structure.addSSBond(ssbond); 265 266 return ssbond; 267 268 } catch (StructureException e) { 269 // Note, in Calpha only mode the CYS SG's are not present. 270 if (! params.isParseCAOnly()) { 271 logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString()); 272 } else { 273 logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode."); 274 } 275 276 return null; 277 } 278 } 279 280 /** 281 * Creates bond objects from a LinkRecord as parsed from a PDB file 282 * @param linkRecord 283 */ 284 public void formLinkRecordBond(LinkRecord linkRecord) { 285 // only work with atoms that aren't alternate locations 286 if (linkRecord.getAltLoc1().equals(" ") 287 || linkRecord.getAltLoc2().equals(" ")) 288 return; 289 290 try { 291 Atom a = getAtomFromRecord(linkRecord.getName1(), 292 linkRecord.getAltLoc1(), linkRecord.getResName1(), 293 linkRecord.getChainID1(), linkRecord.getResSeq1(), 294 linkRecord.getiCode1()); 295 296 Atom b = getAtomFromRecord(linkRecord.getName2(), 297 linkRecord.getAltLoc2(), linkRecord.getResName2(), 298 linkRecord.getChainID2(), linkRecord.getResSeq2(), 299 linkRecord.getiCode2()); 300 301 // TODO determine what the actual bond order of this bond is; for 302 // now, we're assuming they're single bonds 303 new BondImpl(a, b, 1); 304 } catch (StructureException e) { 305 // Note, in Calpha only mode the link atoms may not be present. 306 if (! params.isParseCAOnly()) { 307 logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString()); 308 } else { 309 logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode."); 310 } 311 312 } 313 } 314 315 public void formBondsFromStructConn(List<StructConn> structConn) { 316 317 final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit. 318 319 List<Bond> ssbonds = new ArrayList<>(); 320 321 for (StructConn conn : structConn) { 322 323 if (!BOND_TYPES_TO_PARSE.contains(conn.getConn_type_id())) continue; 324 String chainId1; 325 String chainId2; 326 if(params.isUseInternalChainId()){ 327 chainId1 = conn.getPtnr1_label_asym_id(); 328 chainId2 = conn.getPtnr2_label_asym_id(); 329 } 330 else{ 331 chainId1 = conn.getPtnr1_auth_asym_id(); 332 chainId2 = conn.getPtnr2_auth_asym_id(); 333 } 334 String insCode1 = ""; 335 if (!conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code(); 336 String insCode2 = ""; 337 if (!conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code(); 338 339 String seqId1 = conn.getPtnr1_auth_seq_id(); 340 String seqId2 = conn.getPtnr2_auth_seq_id(); 341 String resName1 = conn.getPtnr1_label_comp_id(); 342 String resName2 = conn.getPtnr2_label_comp_id(); 343 String atomName1 = conn.getPtnr1_label_atom_id(); 344 String atomName2 = conn.getPtnr2_label_atom_id(); 345 String altLoc1 = ""; 346 if (!conn.getPdbx_ptnr1_label_alt_id().equals("?")) altLoc1 = conn.getPdbx_ptnr1_label_alt_id(); 347 String altLoc2 = ""; 348 if (!conn.getPdbx_ptnr2_label_alt_id().equals("?")) altLoc2 = conn.getPdbx_ptnr2_label_alt_id(); 349 350 // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules. 351 if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) { 352 logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet", 353 atomName1, seqId1, insCode1, atomName2, seqId2, insCode2); 354 continue; 355 } 356 357 358 String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")"; 359 String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")"; 360 361 Atom a1 = null; 362 Atom a2 = null; 363 364 try { 365 a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1); 366 367 } catch (StructureException e) { 368 369 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1); 370 continue; 371 } 372 try { 373 a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2); 374 } catch (StructureException e) { 375 376 logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2); 377 continue; 378 } 379 380 if (a1==null) { 381 // we couldn't find the atom, something must be wrong with the file 382 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1); 383 continue; 384 } 385 if (a2==null) { 386 // we couldn't find the atom, something must be wrong with the file 387 logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2); 388 continue; 389 } 390 391 // assuming order 1 for all bonds, no information is provided by struct_conn 392 Bond bond = new BondImpl(a1, a2, 1); 393 394 if (conn.getConn_type_id().equals("disulf")) { 395 ssbonds.add(bond); 396 } 397 398 } 399 400 // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds 401 structure.setSSBonds(ssbonds); 402 } 403 404 private Atom getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode) 405 throws StructureException { 406 407 if (iCode==null || iCode.isEmpty()) { 408 iCode = " "; // an insertion code of ' ' is ignored 409 } 410 Chain chain = structure.getChainByPDB(chainID); 411 ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0)); 412 Group group = chain.getGroupByPDB(resNum); 413 414 Group g = group; 415 // there is an alternate location 416 if (!altLoc.isEmpty()) { 417 g = group.getAltLocGroup(altLoc.charAt(0)); 418 if (g==null) 419 throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID); 420 } 421 422 return g.getAtom(name); 423 } 424}