001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Mar. 6, 2014
021 *
022 */
023package org.biojava.nbio.structure.io;
024
025import org.biojava.nbio.structure.*;
026import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
027import org.biojava.nbio.structure.io.mmcif.ChemCompProvider;
028import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
029import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
030import org.biojava.nbio.structure.io.mmcif.model.StructConn;
031import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import java.util.ArrayList;
036import java.util.HashSet;
037import java.util.List;
038import java.util.Set;
039
040/**
041 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and
042 * intra-group (residue) bonds based on data from the Chemical Component Dictionary
043 * to the Structure object.
044 *
045 * TODO the current implementation adds bonds to the first model only. This
046 * should be sufficient for homogeneous models, but here are a few inhomogeneous models
047 * in the PDB. A better handling of models should be considered in the future.
048 *
049 * @author Peter Rose
050 * @author Ulysse Carion
051 *
052 */
053public class BondMaker {
054
055
056        private static final Logger logger = LoggerFactory.getLogger(BondMaker.class);
057
058        /**
059         * The types of bonds that are read from struct_conn (type specified in field conn_type_id)
060         */
061        public static final Set<String> BOND_TYPES_TO_PARSE;
062        static {
063                BOND_TYPES_TO_PARSE = new HashSet<>();
064                BOND_TYPES_TO_PARSE.add("disulf");
065                BOND_TYPES_TO_PARSE.add("covale");
066                BOND_TYPES_TO_PARSE.add("covale_base");
067                BOND_TYPES_TO_PARSE.add("covale_phosphate");
068                BOND_TYPES_TO_PARSE.add("covale_sugar");
069                BOND_TYPES_TO_PARSE.add("modres");
070        }
071
072
073        /**
074         * Maximum peptide (C - N) bond length considered for bond formation
075         */
076        private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8;
077        /**
078         * Maximum nucleotide (P - O3') bond length considered for bond formation
079         */
080        private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1;
081
082        private Structure structure;
083        private FileParsingParameters params;
084
085        public BondMaker(Structure structure, FileParsingParameters params) {
086                this.structure = structure;
087                this.params = params;
088        }
089
090        /**
091         * Creates bond objects and corresponding references in Atom objects:
092         * <li>
093         * peptide bonds: inferred from sequence and distances
094         * </li>
095         * <li>
096         * nucleotide bonds: inferred from sequence and distances
097         * </li>
098         * <li>
099         * intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider}
100         * </li>
101         */
102        public void makeBonds() {
103                formPeptideBonds();
104                formNucleotideBonds();
105                formIntraResidueBonds();
106                trimBondLists();
107        }
108
109        private void formPeptideBonds() {
110                for (Chain chain : structure.getChains()) {
111                        List<Group> groups = chain.getSeqResGroups();
112
113                        for (int i = 0; i < groups.size() - 1; i++) {
114                                if (!(groups.get(i) instanceof AminoAcidImpl)
115                                                || !(groups.get(i + 1) instanceof AminoAcidImpl))
116                                        continue;
117
118                                AminoAcidImpl tail = (AminoAcidImpl) groups.get(i);
119                                AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1);
120
121                                // atoms with no residue number don't have atom information
122                                if (tail.getResidueNumber() == null
123                                                || head.getResidueNumber() == null) {
124                                        continue;
125                                }
126
127                                Atom carboxylC;
128                                Atom aminoN;
129
130                                carboxylC = tail.getC();
131                                aminoN = head.getN();
132
133
134                                if (carboxylC == null || aminoN == null) {
135                                        // some structures may be incomplete and not store info
136                                        // about all of their atoms
137
138                                        continue;
139                                }
140
141
142                                if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) {
143                                        new BondImpl(carboxylC, aminoN, 1);
144                                }
145
146                        }
147                }
148        }
149
150        private void formNucleotideBonds() {
151                for (Chain chain : structure.getChains()) {
152                        List<Group> groups = chain.getSeqResGroups();
153
154                        for (int i = 0; i < groups.size() - 1; i++) {
155                                if (!(groups.get(i) instanceof NucleotideImpl)
156                                                || !(groups.get(i + 1) instanceof NucleotideImpl))
157                                        continue;
158
159                                NucleotideImpl tail = (NucleotideImpl) groups.get(i);
160                                NucleotideImpl head = (NucleotideImpl) groups.get(i + 1);
161
162                                // atoms with no residue number don't have atom information
163                                if (tail.getResidueNumber() == null
164                                                || head.getResidueNumber() == null) {
165                                        continue;
166                                }
167
168                                Atom phosphorous = head.getP();
169                                Atom oThreePrime = tail.getO3Prime();
170
171                                if (phosphorous == null || oThreePrime == null) {
172                                        continue;
173                                }
174
175
176                                if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) {
177                                        new BondImpl(phosphorous, oThreePrime, 1);
178                                }
179
180                        }
181                }
182        }
183
184        private void formIntraResidueBonds() {
185                for (Chain chain : structure.getChains()) {
186                        List<Group> groups = chain.getAtomGroups();
187
188                        for (Group mainGroup : groups) {
189                                // atoms with no residue number don't have atom information
190                                if (mainGroup.getResidueNumber() == null) {
191                                        continue;
192                                }
193
194                                // Now add support for altLocGroup
195                                List<Group> totList = new ArrayList<Group>();
196                                totList.add(mainGroup);
197                                for(Group altLoc: mainGroup.getAltLocs()){
198                                        totList.add(altLoc);
199                                }
200                                // Now iterate through this list
201                                for(Group group : totList){
202
203                                        ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group
204                                                        .getPDBName());
205
206                                        for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) {
207
208                                                Atom a = group.getAtom(chemCompBond.getAtom_id_1());
209                                                Atom b = group.getAtom(chemCompBond.getAtom_id_2());
210                                                if ( a != null && b != null){
211
212                                                        int bondOrder = chemCompBond.getNumericalBondOrder();
213
214                                                        new BondImpl(a, b, bondOrder);
215                                                } else  {
216
217                                                        // Some of the atoms were missing. That's fine, there's
218                                                        // nothing to do in this case.
219                                                }
220                                        }
221                                }
222                        }
223                }
224        }
225
226
227        private void trimBondLists() {
228                for (Chain chain : structure.getChains()) {
229                        for (Group group : chain.getAtomGroups()) {
230                                for (Atom atom : group.getAtoms()) {
231                                        if (atom.getBonds()!=null && atom.getBonds().size() > 0) {
232                                                ((ArrayList<Bond>) atom.getBonds()).trimToSize();
233                                        }
234                                }
235                        }
236                }
237        }
238
239        /**
240         * Creates disulfide bond objects and references in the corresponding Atoms objects, given
241         * a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file.
242         * @param disulfideBonds
243         */
244        public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) {
245                List<Bond> bonds = new ArrayList<>();
246                for (SSBondImpl disulfideBond : disulfideBonds) {
247                        Bond bond = formDisulfideBond(disulfideBond);
248                        if (bond!=null) bonds.add(bond);
249                }
250                structure.setSSBonds(bonds);
251        }
252
253        private Bond formDisulfideBond(SSBondImpl disulfideBond) {
254                try {
255                        Atom a = getAtomFromRecord("SG", "", "CYS",
256                                        disulfideBond.getChainID1(), disulfideBond.getResnum1(),
257                                        disulfideBond.getInsCode1());
258                        Atom b = getAtomFromRecord("SG", "", "CYS",
259                                        disulfideBond.getChainID2(), disulfideBond.getResnum2(),
260                                        disulfideBond.getInsCode2());
261
262                        Bond ssbond = new BondImpl(a, b, 1);
263
264                        structure.addSSBond(ssbond);
265
266                        return ssbond;
267
268                } catch (StructureException e) {
269                        // Note, in Calpha only mode the CYS SG's are not present.
270                        if (! params.isParseCAOnly()) {
271                                logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString());
272                        } else {
273                                logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode.");
274                        }
275
276                        return null;
277                }
278        }
279
280        /**
281         * Creates bond objects from a LinkRecord as parsed from a PDB file
282         * @param linkRecord
283         */
284        public void formLinkRecordBond(LinkRecord linkRecord) {
285                // only work with atoms that aren't alternate locations
286                if (linkRecord.getAltLoc1().equals(" ")
287                                || linkRecord.getAltLoc2().equals(" "))
288                        return;
289
290                try {
291                        Atom a = getAtomFromRecord(linkRecord.getName1(),
292                                        linkRecord.getAltLoc1(), linkRecord.getResName1(),
293                                        linkRecord.getChainID1(), linkRecord.getResSeq1(),
294                                        linkRecord.getiCode1());
295
296                        Atom b = getAtomFromRecord(linkRecord.getName2(),
297                                        linkRecord.getAltLoc2(), linkRecord.getResName2(),
298                                        linkRecord.getChainID2(), linkRecord.getResSeq2(),
299                                        linkRecord.getiCode2());
300
301                        // TODO determine what the actual bond order of this bond is; for
302                        // now, we're assuming they're single bonds
303                        new BondImpl(a, b, 1);
304                } catch (StructureException e) {
305                        // Note, in Calpha only mode the link atoms may not be present.
306                        if (! params.isParseCAOnly()) {
307                                logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString());
308                        } else {
309                                logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode.");
310                        }
311
312                }
313        }
314
315        public void formBondsFromStructConn(List<StructConn> structConn) {
316
317                final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit.
318
319                List<Bond> ssbonds = new ArrayList<>();
320
321                for (StructConn conn : structConn) {
322
323                        if (!BOND_TYPES_TO_PARSE.contains(conn.getConn_type_id())) continue;
324                        String chainId1;
325                        String chainId2;
326                        if(params.isUseInternalChainId()){
327                                chainId1 = conn.getPtnr1_label_asym_id();
328                                chainId2 = conn.getPtnr2_label_asym_id();
329                        }
330                        else{
331                                chainId1 = conn.getPtnr1_auth_asym_id();
332                                chainId2 = conn.getPtnr2_auth_asym_id();
333                        }
334                        String insCode1 = "";
335                        if (!conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code();
336                        String insCode2 = "";
337                        if (!conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code();
338
339                        String seqId1 = conn.getPtnr1_auth_seq_id();
340                        String seqId2 = conn.getPtnr2_auth_seq_id();
341                        String resName1 = conn.getPtnr1_label_comp_id();
342                        String resName2 = conn.getPtnr2_label_comp_id();
343                        String atomName1 = conn.getPtnr1_label_atom_id();
344                        String atomName2 = conn.getPtnr2_label_atom_id();
345                        String altLoc1 = "";
346                        if (!conn.getPdbx_ptnr1_label_alt_id().equals("?")) altLoc1 = conn.getPdbx_ptnr1_label_alt_id();
347                        String altLoc2 = "";
348                        if (!conn.getPdbx_ptnr2_label_alt_id().equals("?")) altLoc2 = conn.getPdbx_ptnr2_label_alt_id();
349
350                        // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules.
351                        if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) {
352                                logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet",
353                                                atomName1, seqId1, insCode1, atomName2, seqId2, insCode2);
354                                continue;
355                        }
356
357
358                        String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")";
359                        String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")";
360
361                        Atom a1 = null;
362                        Atom a2 = null;
363
364                        try {
365                                a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1);
366
367                        } catch (StructureException e) {
368
369                                logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1);
370                                continue;
371                        }
372                        try {
373                                a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2);
374                        } catch (StructureException e) {
375
376                                logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2);
377                                continue;
378                        }
379
380                        if (a1==null) {
381                                // we couldn't find the atom, something must be wrong with the file
382                                logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1);
383                                continue;
384                        }
385                        if (a2==null) {
386                                // we couldn't find the atom, something must be wrong with the file
387                                logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2);
388                                continue;
389                        }
390
391                        // assuming order 1 for all bonds, no information is provided by struct_conn
392                        Bond bond = new BondImpl(a1, a2, 1);
393
394                        if (conn.getConn_type_id().equals("disulf")) {
395                                ssbonds.add(bond);
396                        }
397
398                }
399
400                // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds
401                structure.setSSBonds(ssbonds);
402        }
403
404        private Atom getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode)
405                        throws StructureException {
406
407                if (iCode==null || iCode.isEmpty()) {
408                        iCode = " "; // an insertion code of ' ' is ignored
409                }
410                Chain chain = structure.getChainByPDB(chainID);
411                ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0));
412                Group group = chain.getGroupByPDB(resNum);
413
414                Group g = group;
415                // there is an alternate location
416                if (!altLoc.isEmpty()) {
417                        g = group.getAltLocGroup(altLoc.charAt(0));
418                        if (g==null)
419                                throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID);
420                }
421
422                return g.getAtom(name);
423        }
424}