001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 *
021 */
022package org.biojava.nbio.structure.basepairs;
023
024import org.biojava.nbio.structure.Atom;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Group;
027import org.biojava.nbio.structure.Structure;
028import org.biojava.nbio.structure.contact.Pair;
029
030import javax.vecmath.Matrix4d;
031import java.util.ArrayList;
032import java.util.List;
033
034/**
035 * This class allows for finding inter-strand base pairs that are not necessarily canonical Watson-Crick pairs.
036 * The implementation of findPair is different than that of the base class.  This class does not consider intra-strand
037 * base pairing and for that, the TertiaryBasePairParameters class should be used.
038 * @author Luke Czapla
039 * @since 5.0.0
040 *
041 */
042public class MismatchedBasePairParameters extends BasePairParameters {
043
044        private static final long serialVersionUID = 2837124340169886674L;
045        
046        public static final double DEFAULT_MAX_STAGGER = 2.0;
047    public static final double DEFAULT_MAX_PROPELLER = 60.0;
048    public static final double DEFAULT_MAX_SHEAR = 5.0;
049    public static final double DEFAULT_MAX_STRETCH = 5.0;
050
051    // These are the criteria used to select proper base pairs.
052    private double maxStagger = DEFAULT_MAX_STAGGER,
053            maxShear = DEFAULT_MAX_SHEAR,
054            maxStretch = DEFAULT_MAX_STRETCH,
055            maxPropeller = DEFAULT_MAX_PROPELLER;
056
057    /**
058     * This constructor is used to create the TertiaryBasePairParameters object.  The parent constructors are valid
059     * as well, but for this class, it makes the most sense to specify the exact parameters for the analysis.
060     * @param structure The Structure to analyze
061     * @param RNA Whether to analyze RNA (if false, it will analyze DNA)
062     * @param removeDups Whether to remove duplicate sequences (useful for RCSB data with redundant units).
063     * @param canonical Whether to only consider canonical Watson-Crick base pairs.  If false, any pairing will be identified
064     *                  as long it falls below the maximum values of stagger, shear, and stretch.
065     */
066    public MismatchedBasePairParameters(Structure structure, boolean RNA, boolean removeDups, boolean canonical) {
067
068        super(structure, RNA, removeDups, canonical);
069
070    }
071
072    /**
073     * This is an implementation for finding non-canonical base pairs when there may be missing or overhanging bases.
074     * @param chains The list of chains already found to be nucleic acids.
075     * @return The list of the atom groups (residues) that are pairs, as a Pair of nucleic acid Groups.
076     */
077    @Override
078    public List<Pair<Group>> findPairs(List<Chain> chains) {
079        List<Pair<Group>> result = new ArrayList<>();
080        boolean lastFoundPair = false;
081        for (int i = 0; i < chains.size(); i++) {
082            Chain c = chains.get(i);
083            String sequence = c.getAtomSequence();
084            for (int m = 0; m < sequence.length(); m++) {
085                boolean foundPair = false;
086                Integer type1, type2;
087                for (int j = i + 1; j < chains.size() && !foundPair; j++) {
088                    Chain c2 = chains.get(j);
089                    if (j > i+1 && c.getAtomSequence().equals(c2.getAtomSequence()) && nonredundant) continue;
090                    String sequence2 = c2.getAtomSequence();
091                    for (int k = c2.getAtomSequence().length() - 1; k >= 0 && !foundPair; k--) {
092                        if (canonical && !BasePairParameters.match(sequence.charAt(m), sequence2.charAt(k), useRNA)) continue;
093                        Group g1 = c.getAtomGroup(m);
094                        Group g2 = c2.getAtomGroup(k);
095                        type1 = BASE_MAP.get(g1.getPDBName());
096                        type2 = BASE_MAP.get(g2.getPDBName());
097                        if (type1 == null || type2 == null) continue;
098                        Atom a1 = g1.getAtom("C1'");
099                        Atom a2 = g2.getAtom("C1'");
100                        if (a1 == null || a2 == null) continue;
101                        // C1'-C1' distance is one useful criteria
102                        if (Math.abs(a1.getCoordsAsPoint3d().distance(a2.getCoordsAsPoint3d()) - 10.0) > 4.0) continue;
103                        Pair<Group> ga = new Pair<>(g1, g2);
104                        // TODO is this call needed?? JD 2018-03-07
105                        @SuppressWarnings("unused")
106                                                Matrix4d data = basePairReferenceFrame(ga);
107                        // if the stagger is greater than 2 Å, it's not really paired.
108                        if (Math.abs(pairParameters[5]) > maxStagger) continue;
109                        // similarly, extreme shear and stretch is not a good base pair
110                        if (Math.abs(pairParameters[3]) > maxShear) continue;
111                        if (Math.abs(pairParameters[4]) > maxStretch) continue;
112
113                        // if the propeller is ridiculous it's also not that good of a pair.
114                        if (Math.abs(pairParameters[1]) > maxPropeller) {
115                            continue;
116                        }
117                        result.add(ga);
118                        pairingNames.add(useRNA ? BASE_LIST_RNA[type1] + BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1] + BASE_LIST_DNA[type2]);
119                        foundPair = true;
120                    }
121                    if (!foundPair && lastFoundPair) {
122                        if (pairSequence.length() > 0 && pairSequence.charAt(pairSequence.length() - 1) != ' ')
123                            pairSequence += ' ';
124                    }
125                    if (foundPair) pairSequence += (c.getAtomSequence().charAt(i));
126                    lastFoundPair = foundPair;
127                }
128            }
129        }
130        return result;
131    }
132
133    /**
134     * This method returns the maximum stagger between bases used as criteria for the characterization of two bases as being paired.
135     * @return the maximum propeller ("propeller-twist", in degrees) allowed.
136     */
137    public double getMaxStagger() {
138        return maxStagger;
139    }
140
141    /**
142     * This method sets the maximum stagger allowed for a base pair, prior to analyze() call
143     * @param maxStagger The maximum propeller (in Å) allowed to consider two bases paired
144     */
145    public void setMaxStagger(double maxStagger) {
146        this.maxStagger = maxStagger;
147    }
148
149    /**
150     * This method returns the maximum shear between bases used as criteria for the characterization of two bases as being paired.
151     * @return the maximum shear (in Å) allowed.
152     */
153    public double getMaxShear() {
154        return maxShear;
155    }
156
157    /**
158     * This method sets the maximum shear allowed for a base pair, prior to analyze() call
159     * @param maxShear The maximum shear (in Å) allowed to consider two bases paired
160     */
161    public void setMaxShear(double maxShear) {
162        this.maxShear = maxShear;
163    }
164
165    /**
166     * This method returns the maximum stretch between bases used as criteria for the characterization of two bases as being paired.
167     * @return the maximum stretch (in Å) allowed.
168     */
169    public double getMaxStretch() {
170        return maxStretch;
171    }
172
173    /**
174     * This method sets the maximum stretch allowed for a base pair, prior to analyze() call.
175     * @param maxStretch The maximum stretch (in Å) allowed to consider two bases paired
176     */
177    public void setMaxStretch(double maxStretch) {
178        this.maxStretch = maxStretch;
179    }
180
181    /**
182     * This method returns the maximum propeller twist between bases used as criteria for the characterization of two bases as being paired.
183     * @return the maximum propeller ("propeller-twist", in degrees) allowed.
184     */
185    public double getMaxPropeller() {
186        return maxPropeller;
187    }
188
189    /**
190     * This method sets the maximum propeller allowed for a base pair, prior to analyze() call
191     * @param maxPropeller The maximum propeller ("propeller-twist", in degrees) allowed to consider two bases paired
192     */
193    public void setMaxPropeller(double maxPropeller) {
194        this.maxPropeller = maxPropeller;
195    }
196}