001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Mar. 6, 2014
021 *
022 */
023package org.biojava.nbio.structure.io;
024
025import org.biojava.nbio.structure.*;
026import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
027import org.biojava.nbio.structure.io.mmcif.ChemCompProvider;
028import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
029import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
030import org.biojava.nbio.structure.io.mmcif.model.StructConn;
031import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import java.util.ArrayList;
036import java.util.HashMap;
037import java.util.HashSet;
038import java.util.List;
039import java.util.Map;
040import java.util.Set;
041
042/**
043 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and
044 * intra-group (residue) bonds based on data from the Chemical Component Dictionary
045 * to the Structure object.
046 *
047 * TODO the current implementation adds bonds to the first model only. This
048 * should be sufficient for homogeneous models, but here are a few inhomogeneous models
049 * in the PDB. A better handling of models should be considered in the future.
050 *
051 * @author Peter Rose
052 * @author Ulysse Carion
053 *
054 */
055public class BondMaker {
056
057
058        private static final Logger logger = LoggerFactory.getLogger(BondMaker.class);
059
060        /**
061         * The types of bonds that are read from struct_conn (type specified in field conn_type_id)
062         */
063        public static final Set<String> BOND_TYPES_TO_PARSE;
064        static {
065                BOND_TYPES_TO_PARSE = new HashSet<>();
066                BOND_TYPES_TO_PARSE.add("disulf");
067                BOND_TYPES_TO_PARSE.add("covale");
068                BOND_TYPES_TO_PARSE.add("covale_base");
069                BOND_TYPES_TO_PARSE.add("covale_phosphate");
070                BOND_TYPES_TO_PARSE.add("covale_sugar");
071                BOND_TYPES_TO_PARSE.add("modres");
072        }
073
074
075        /**
076         * Maximum peptide (C - N) bond length considered for bond formation
077         */
078        private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8;
079        /**
080         * Maximum nucleotide (P - O3') bond length considered for bond formation
081         */
082        private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1;
083
084        private Structure structure;
085        private FileParsingParameters params;
086
087        public BondMaker(Structure structure, FileParsingParameters params) {
088                this.structure = structure;
089                this.params = params;
090        }
091
092        /**
093         * Creates bond objects and corresponding references in Atom objects:
094         * <li>
095         * peptide bonds: inferred from sequence and distances
096         * </li>
097         * <li>
098         * nucleotide bonds: inferred from sequence and distances
099         * </li>
100         * <li>
101         * intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider}
102         * </li>
103         */
104        public void makeBonds() {
105                logger.debug("Going to start making bonds");
106                formPeptideBonds();
107                formNucleotideBonds();
108                formIntraResidueBonds();
109                trimBondLists();
110        }
111
112        private void formPeptideBonds() {
113                for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){
114                        for (Chain chain : structure.getChains(modelInd)) {
115                                List<Group> groups = chain.getSeqResGroups();
116
117                                for (int i = 0; i < groups.size() - 1; i++) {
118                                        if (!(groups.get(i) instanceof AminoAcidImpl)
119                                                        || !(groups.get(i + 1) instanceof AminoAcidImpl))
120                                                continue;
121
122                                        AminoAcidImpl tail = (AminoAcidImpl) groups.get(i);
123                                        AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1);
124
125                                        // atoms with no residue number don't have atom information
126                                        if (tail.getResidueNumber() == null
127                                                        || head.getResidueNumber() == null) {
128                                                continue;
129                                        }
130
131                                        Atom carboxylC;
132                                        Atom aminoN;
133
134                                        carboxylC = tail.getC();
135                                        aminoN = head.getN();
136
137
138                                        if (carboxylC == null || aminoN == null) {
139                                                // some structures may be incomplete and not store info
140                                                // about all of their atoms
141
142                                                continue;
143                                        }
144
145
146                                        if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) {
147                                                new BondImpl(carboxylC, aminoN, 1);
148                                        }
149
150                                }
151                        }
152                }
153        }
154
155        private void formNucleotideBonds() {
156                for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){
157                        for (Chain chain : structure.getChains(modelInd)) {
158                                List<Group> groups = chain.getSeqResGroups();
159
160                                for (int i = 0; i < groups.size() - 1; i++) {
161                                        if (!(groups.get(i) instanceof NucleotideImpl)
162                                                        || !(groups.get(i + 1) instanceof NucleotideImpl))
163                                                continue;
164
165                                        NucleotideImpl tail = (NucleotideImpl) groups.get(i);
166                                        NucleotideImpl head = (NucleotideImpl) groups.get(i + 1);
167
168                                        // atoms with no residue number don't have atom information
169                                        if (tail.getResidueNumber() == null
170                                                        || head.getResidueNumber() == null) {
171                                                continue;
172                                        }
173
174                                        Atom phosphorous = head.getP();
175                                        Atom oThreePrime = tail.getO3Prime();
176
177                                        if (phosphorous == null || oThreePrime == null) {
178                                                continue;
179                                        }
180
181
182                                        if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) {
183                                                new BondImpl(phosphorous, oThreePrime, 1);
184                                        }
185
186                                }
187                        }
188                }
189        }
190
191        private void formIntraResidueBonds() {
192                for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){
193                        for (Chain chain : structure.getChains(modelInd)) {
194                                List<Group> groups = chain.getAtomGroups();
195                                for (Group mainGroup : groups) {
196                                        // atoms with no residue number don't have atom information
197                                        if (mainGroup.getResidueNumber() == null) {
198                                                continue;
199                                        }
200                                        // Now add support for altLocGroup
201                                        List<Group> totList = new ArrayList<Group>();
202                                        totList.add(mainGroup);
203                                        for(Group altLoc: mainGroup.getAltLocs()){
204                                                totList.add(altLoc);
205                                        }
206
207
208                                        // Now iterate through this list
209                                        for(Group group : totList){
210
211                                                ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group.getPDBName());
212                                                logger.debug("chemcomp for residue {}-{} has {} atoms and {} bonds",
213                                                                group.getPDBName(), group.getResidueNumber(), aminoChemComp.getAtoms().size(), aminoChemComp.getBonds().size());
214
215                                                for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) {
216                                                        Atom a = getAtom(chemCompBond.getAtom_id_1(), group);
217                                                        Atom b = getAtom(chemCompBond.getAtom_id_2(), group);
218                                                        if ( a != null && b != null){
219                                                                int bondOrder = chemCompBond.getNumericalBondOrder();
220                                                                logger.debug("Forming bond between atoms {}-{} and {}-{} with bond order {}",
221                                                                                a.getPDBserial(), a.getName(), b.getPDBserial(), b.getName(), bondOrder);
222                                                                new BondImpl(a, b, bondOrder);
223                                                        }
224                                                        else{
225                                                                // Some of the atoms were missing. That's fine, there's
226                                                                // nothing to do in this case.
227                                                        }
228                                                }
229                                        }
230                                }
231                        }
232
233                }
234        }
235
236        private Atom getAtom(String atomId, Group group) {
237                Atom a = group.getAtom(atomId);
238                // Check for deuteration
239                if(a==null && atomId.startsWith("H")) {
240                        a = group.getAtom(atomId.replaceFirst("H", "D"));
241                        // Check it is actually deuterated
242                        if(a!=null){
243                                if(!a.getElement().equals(Element.D)){
244                                        a=null;
245                                }
246                        }
247                }
248                return a;
249        }
250
251        private void trimBondLists() {
252                for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){
253                        for (Chain chain : structure.getChains(modelInd)) {
254                                for (Group group : chain.getAtomGroups()) {
255                                        for (Atom atom : group.getAtoms()) {
256                                                if (atom.getBonds()!=null && atom.getBonds().size() > 0) {
257                                                        ((ArrayList<Bond>) atom.getBonds()).trimToSize();
258                                                }
259                                        }
260                                }
261                        }
262                }
263        }
264
265        /**
266         * Creates disulfide bond objects and references in the corresponding Atoms objects, given
267         * a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file.
268         * @param disulfideBonds
269         */
270        public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) {
271                for (SSBondImpl disulfideBond : disulfideBonds) {
272                        formDisulfideBond(disulfideBond);
273                }
274        }
275
276        private void formDisulfideBond(SSBondImpl disulfideBond) {
277                try {
278                        Map<Integer, Atom> a = getAtomFromRecord("SG", "", "CYS",
279                                        disulfideBond.getChainID1(), disulfideBond.getResnum1(),
280                                        disulfideBond.getInsCode1());
281                        Map<Integer, Atom> b = getAtomFromRecord("SG", "", "CYS",
282                                        disulfideBond.getChainID2(), disulfideBond.getResnum2(),
283                                        disulfideBond.getInsCode2());
284
285                        for(int i=0; i<structure.nrModels(); i++){
286                                if(a.containsKey(i) && b.containsKey(i)){
287                                        // TODO determine what the actual bond order of this bond is; for
288                                        // now, we're assuming they're single bonds
289                                        if(!a.get(i).equals(b.get(i))){
290                                                Bond ssbond =  new BondImpl(a.get(i), b.get(i), 1);
291                                                structure.addSSBond(ssbond);
292                                        }
293                                }
294                        }
295
296
297                } catch (StructureException e) {
298                        // Note, in Calpha only mode the CYS SG's are not present.
299                        if (! params.isParseCAOnly()) {
300                                logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString());
301                        } else {
302                                logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode.");
303                        }
304                }
305        }
306
307        /**
308         * Creates bond objects from a LinkRecord as parsed from a PDB file
309         * @param linkRecord
310         */
311        public void formLinkRecordBond(LinkRecord linkRecord) {
312                // only work with atoms that aren't alternate locations
313                if (linkRecord.getAltLoc1().equals(" ")
314                                || linkRecord.getAltLoc2().equals(" "))
315                        return;
316
317                try {
318                        Map<Integer, Atom> a = getAtomFromRecord(linkRecord.getName1(),
319                                        linkRecord.getAltLoc1(), linkRecord.getResName1(),
320                                        linkRecord.getChainID1(), linkRecord.getResSeq1(),
321                                        linkRecord.getiCode1());
322
323                        Map<Integer, Atom> b = getAtomFromRecord(linkRecord.getName2(),
324                                        linkRecord.getAltLoc2(), linkRecord.getResName2(),
325                                        linkRecord.getChainID2(), linkRecord.getResSeq2(),
326                                        linkRecord.getiCode2());
327
328                        for(int i=0; i<structure.nrModels(); i++){
329                                if(a.containsKey(i) && b.containsKey(i)){
330                                        // TODO determine what the actual bond order of this bond is; for
331                                        // now, we're assuming they're single bonds
332                                        if(!a.get(i).equals(b.get(i))){
333                                                new BondImpl(a.get(i), b.get(i), 1);
334                                        }
335                                }
336                        }
337                }catch (StructureException e) {
338                        // Note, in Calpha only mode the link atoms may not be present.
339                        if (! params.isParseCAOnly()) {
340                                logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString());
341                        } else {
342                                logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode.");
343                        }
344
345                }
346        }
347
348
349        public void formBondsFromStructConn(List<StructConn> structConn) {
350
351                final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit.
352
353                List<Bond> ssbonds = new ArrayList<>();
354
355                for (StructConn conn : structConn) {
356
357                        if (!BOND_TYPES_TO_PARSE.contains(conn.getConn_type_id())) continue;
358                        String chainId1;
359                        String chainId2;
360
361                        chainId1 = conn.getPtnr1_label_asym_id();
362                        chainId2 = conn.getPtnr2_label_asym_id();
363
364                        String insCode1 = "";
365                        if (conn.getPdbx_ptnr1_PDB_ins_code() != null &&
366                                !conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code();
367                        String insCode2 = "";
368                        if (conn.getPdbx_ptnr2_PDB_ins_code() != null &&
369                                !conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code();
370
371                        String seqId1 = conn.getPtnr1_auth_seq_id();
372                        String seqId2 = conn.getPtnr2_auth_seq_id();
373                        String resName1 = conn.getPtnr1_label_comp_id();
374                        String resName2 = conn.getPtnr2_label_comp_id();
375                        String atomName1 = conn.getPtnr1_label_atom_id();
376                        String atomName2 = conn.getPtnr2_label_atom_id();
377                        String altLoc1 = "";
378                        if (!conn.getPdbx_ptnr1_label_alt_id().equals("?")) altLoc1 = conn.getPdbx_ptnr1_label_alt_id();
379                        String altLoc2 = "";
380                        if (!conn.getPdbx_ptnr2_label_alt_id().equals("?")) altLoc2 = conn.getPdbx_ptnr2_label_alt_id();
381
382                        // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules.
383                        if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) {
384                                logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet",
385                                                atomName1, seqId1, insCode1, atomName2, seqId2, insCode2);
386                                continue;
387                        }
388
389
390                        String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")";
391                        String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")";
392
393                        Map<Integer,Atom> a1 = null;
394                        Map<Integer,Atom> a2 = null;
395
396                        try {
397                                a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1);
398
399                        } catch (StructureException e) {
400
401                                logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1);
402                                continue;
403                        }
404                        try {
405                                a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2);
406                        } catch (StructureException e) {
407
408                                logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2);
409                                continue;
410                        }
411
412                        if (a1==null) {
413                                // we couldn't find the atom, something must be wrong with the file
414                                logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1);
415                                continue;
416                        }
417                        if (a2==null) {
418                                // we couldn't find the atom, something must be wrong with the file
419                                logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2);
420                                continue;
421                        }
422
423                        // assuming order 1 for all bonds, no information is provided by struct_conn
424                        for(int i=0; i<structure.nrModels(); i++){
425                                Bond bond = null;
426                                if(a1.containsKey(i) && a2.containsKey(i)){
427                                        if(!a1.get(i).equals(a2.get(i))){
428                                                bond = new BondImpl(a1.get(i), a2.get(i), 1);
429                                        }
430                                }
431                                if(bond!=null){
432                                        if (conn.getConn_type_id().equals("disulf")) {
433                                                ssbonds.add(bond);
434                                        }
435                                }
436                        }
437
438                }
439
440                // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds
441                structure.setSSBonds(ssbonds);
442        }
443
444        private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode)
445                        throws StructureException {
446
447                if (iCode==null || iCode.isEmpty()) {
448                        iCode = " "; // an insertion code of ' ' is ignored
449                }
450                Map<Integer, Atom> outMap = new HashMap<>();
451                ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0));
452
453                for (int i=0; i<structure.nrModels(); i++){
454                        Chain chain = structure.getChain(chainID,i);
455                        Group group = chain.getGroupByPDB(resNum);
456
457                        Group g = group;
458                        // there is an alternate location
459                        if (!altLoc.isEmpty()) {
460                                g = group.getAltLocGroup(altLoc.charAt(0));
461                                if (g==null)
462                                        throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID);
463                        }
464                        Atom a = g.getAtom(name);
465                        if (a!=null){
466                                outMap.put(i,a);
467                        }
468                }
469                return outMap;
470        }
471}