001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 *
021 */
022package org.biojava.nbio.structure.basepairs;
023
024import org.biojava.nbio.structure.Atom;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Group;
027import org.biojava.nbio.structure.Structure;
028import org.biojava.nbio.structure.contact.Pair;
029
030import javax.vecmath.Matrix4d;
031import java.util.ArrayList;
032import java.util.List;
033
034/**
035 * This class allows for finding inter-strand base pairs that are not necessarily canonical Watson-Crick pairs.
036 * The implementation of findPair is different than that of the base class.  This class does not consider intra-strand
037 * base pairing and for that, the TertiaryBasePairParameters class should be used.
038 * @author Luke Czapla
039 * @since 5.0.0
040 *
041 */
042public class MismatchedBasePairParameters extends BasePairParameters {
043
044        private static final long serialVersionUID = 2837124340169886674L;
045
046        public static final double DEFAULT_MAX_STAGGER = 2.0;
047        public static final double DEFAULT_MAX_PROPELLER = 60.0;
048        public static final double DEFAULT_MAX_SHEAR = 5.0;
049        public static final double DEFAULT_MAX_STRETCH = 5.0;
050
051        // These are the criteria used to select proper base pairs.
052        private double maxStagger = DEFAULT_MAX_STAGGER,
053                        maxShear = DEFAULT_MAX_SHEAR,
054                        maxStretch = DEFAULT_MAX_STRETCH,
055                        maxPropeller = DEFAULT_MAX_PROPELLER;
056
057        /**
058         * This constructor is used to create the TertiaryBasePairParameters object.  The parent constructors are valid
059         * as well, but for this class, it makes the most sense to specify the exact parameters for the analysis.
060         * @param structure The Structure to analyze
061         * @param RNA Whether to analyze RNA (if false, it will analyze DNA)
062         * @param removeDups Whether to remove duplicate sequences (useful for RCSB data with redundant units).
063         * @param canonical Whether to only consider canonical Watson-Crick base pairs.  If false, any pairing will be identified
064         *                  as long it falls below the maximum values of stagger, shear, and stretch.
065         */
066        public MismatchedBasePairParameters(Structure structure, boolean RNA, boolean removeDups, boolean canonical) {
067
068                super(structure, RNA, removeDups, canonical);
069
070        }
071
072        /**
073         * This is an implementation for finding non-canonical base pairs when there may be missing or overhanging bases.
074         * @param chains The list of chains already found to be nucleic acids.
075         * @return The list of the atom groups (residues) that are pairs, as a Pair of nucleic acid Groups.
076         */
077        @Override
078        public List<Pair<Group>> findPairs(List<Chain> chains) {
079                List<Pair<Group>> result = new ArrayList<>();
080                boolean lastFoundPair = false;
081                for (int i = 0; i < chains.size(); i++) {
082                        Chain c = chains.get(i);
083                        String sequence = c.getAtomSequence();
084                        for (int m = 0; m < sequence.length(); m++) {
085                                boolean foundPair = false;
086                                Integer type1, type2;
087                                for (int j = i + 1; j < chains.size() && !foundPair; j++) {
088                                        Chain c2 = chains.get(j);
089                                        if (j > i+1 && c.getAtomSequence().equals(c2.getAtomSequence()) && nonredundant) continue;
090                                        String sequence2 = c2.getAtomSequence();
091                                        for (int k = c2.getAtomSequence().length() - 1; k >= 0 && !foundPair; k--) {
092                                                if (canonical && !BasePairParameters.match(sequence.charAt(m), sequence2.charAt(k), useRNA)) continue;
093                                                Group g1 = c.getAtomGroup(m);
094                                                Group g2 = c2.getAtomGroup(k);
095                                                type1 = BASE_MAP.get(g1.getPDBName());
096                                                type2 = BASE_MAP.get(g2.getPDBName());
097                                                if (type1 == null || type2 == null) continue;
098                                                Atom a1 = g1.getAtom("C1'");
099                                                Atom a2 = g2.getAtom("C1'");
100                                                if (a1 == null || a2 == null) continue;
101                                                // C1'-C1' distance is one useful criteria
102                                                if (Math.abs(a1.getCoordsAsPoint3d().distance(a2.getCoordsAsPoint3d()) - 10.0) > 4.0) continue;
103                                                Pair<Group> ga = new Pair<>(g1, g2);
104                                                // TODO is this call needed?? JD 2018-03-07
105                                                @SuppressWarnings("unused")
106                                                Matrix4d data = basePairReferenceFrame(ga);
107                                                // if the stagger is greater than 2 Å, it's not really paired.
108                                                if (Math.abs(pairParameters[5]) > maxStagger) continue;
109                                                // similarly, extreme shear and stretch is not a good base pair
110                                                if (Math.abs(pairParameters[3]) > maxShear) continue;
111                                                if (Math.abs(pairParameters[4]) > maxStretch) continue;
112
113                                                // if the propeller is ridiculous it's also not that good of a pair.
114                                                if (Math.abs(pairParameters[1]) > maxPropeller) {
115                                                        continue;
116                                                }
117                                                result.add(ga);
118                                                pairingNames.add(useRNA ? BASE_LIST_RNA[type1] + BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1] + BASE_LIST_DNA[type2]);
119                                                foundPair = true;
120                                        }
121                                        if (!foundPair && lastFoundPair) {
122                                                if (pairSequence.length() > 0 && pairSequence.charAt(pairSequence.length() - 1) != ' ')
123                                                        pairSequence += ' ';
124                                        }
125                                        if (foundPair) pairSequence += (c.getAtomSequence().charAt(i));
126                                        lastFoundPair = foundPair;
127                                }
128                        }
129                }
130                return result;
131        }
132
133        /**
134         * This method returns the maximum stagger between bases used as criteria for the characterization of two bases as being paired.
135         * @return the maximum propeller ("propeller-twist", in degrees) allowed.
136         */
137        public double getMaxStagger() {
138                return maxStagger;
139        }
140
141        /**
142         * This method sets the maximum stagger allowed for a base pair, prior to analyze() call
143         * @param maxStagger The maximum propeller (in Å) allowed to consider two bases paired
144         */
145        public void setMaxStagger(double maxStagger) {
146                this.maxStagger = maxStagger;
147        }
148
149        /**
150         * This method returns the maximum shear between bases used as criteria for the characterization of two bases as being paired.
151         * @return the maximum shear (in Å) allowed.
152         */
153        public double getMaxShear() {
154                return maxShear;
155        }
156
157        /**
158         * This method sets the maximum shear allowed for a base pair, prior to analyze() call
159         * @param maxShear The maximum shear (in Å) allowed to consider two bases paired
160         */
161        public void setMaxShear(double maxShear) {
162                this.maxShear = maxShear;
163        }
164
165        /**
166         * This method returns the maximum stretch between bases used as criteria for the characterization of two bases as being paired.
167         * @return the maximum stretch (in Å) allowed.
168         */
169        public double getMaxStretch() {
170                return maxStretch;
171        }
172
173        /**
174         * This method sets the maximum stretch allowed for a base pair, prior to analyze() call.
175         * @param maxStretch The maximum stretch (in Å) allowed to consider two bases paired
176         */
177        public void setMaxStretch(double maxStretch) {
178                this.maxStretch = maxStretch;
179        }
180
181        /**
182         * This method returns the maximum propeller twist between bases used as criteria for the characterization of two bases as being paired.
183         * @return the maximum propeller ("propeller-twist", in degrees) allowed.
184         */
185        public double getMaxPropeller() {
186                return maxPropeller;
187        }
188
189        /**
190         * This method sets the maximum propeller allowed for a base pair, prior to analyze() call
191         * @param maxPropeller The maximum propeller ("propeller-twist", in degrees) allowed to consider two bases paired
192         */
193        public void setMaxPropeller(double maxPropeller) {
194                this.maxPropeller = maxPropeller;
195        }
196}