001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * 021 */ 022package org.biojava.nbio.structure.basepairs; 023 024import org.biojava.nbio.structure.Atom; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.Group; 027import org.biojava.nbio.structure.Structure; 028import org.biojava.nbio.structure.contact.Pair; 029 030import javax.vecmath.Matrix4d; 031import java.util.ArrayList; 032import java.util.List; 033 034/** 035 * This class allows for finding inter-strand base pairs that are not necessarily canonical Watson-Crick pairs. 036 * The implementation of findPair is different than that of the base class. This class does not consider intra-strand 037 * base pairing and for that, the TertiaryBasePairParameters class should be used. 038 * @author Luke Czapla 039 * @since 5.0.0 040 * 041 */ 042public class MismatchedBasePairParameters extends BasePairParameters { 043 044 private static final long serialVersionUID = 2837124340169886674L; 045 046 public static final double DEFAULT_MAX_STAGGER = 2.0; 047 public static final double DEFAULT_MAX_PROPELLER = 60.0; 048 public static final double DEFAULT_MAX_SHEAR = 5.0; 049 public static final double DEFAULT_MAX_STRETCH = 5.0; 050 051 // These are the criteria used to select proper base pairs. 052 private double maxStagger = DEFAULT_MAX_STAGGER, 053 maxShear = DEFAULT_MAX_SHEAR, 054 maxStretch = DEFAULT_MAX_STRETCH, 055 maxPropeller = DEFAULT_MAX_PROPELLER; 056 057 /** 058 * This constructor is used to create the TertiaryBasePairParameters object. The parent constructors are valid 059 * as well, but for this class, it makes the most sense to specify the exact parameters for the analysis. 060 * @param structure The Structure to analyze 061 * @param RNA Whether to analyze RNA (if false, it will analyze DNA) 062 * @param removeDups Whether to remove duplicate sequences (useful for RCSB data with redundant units). 063 * @param canonical Whether to only consider canonical Watson-Crick base pairs. If false, any pairing will be identified 064 * as long it falls below the maximum values of stagger, shear, and stretch. 065 */ 066 public MismatchedBasePairParameters(Structure structure, boolean RNA, boolean removeDups, boolean canonical) { 067 068 super(structure, RNA, removeDups, canonical); 069 070 } 071 072 /** 073 * This is an implementation for finding non-canonical base pairs when there may be missing or overhanging bases. 074 * @param chains The list of chains already found to be nucleic acids. 075 * @return The list of the atom groups (residues) that are pairs, as a Pair of nucleic acid Groups. 076 */ 077 @Override 078 public List<Pair<Group>> findPairs(List<Chain> chains) { 079 List<Pair<Group>> result = new ArrayList<>(); 080 boolean lastFoundPair = false; 081 for (int i = 0; i < chains.size(); i++) { 082 Chain c = chains.get(i); 083 String sequence = c.getAtomSequence(); 084 for (int m = 0; m < sequence.length(); m++) { 085 boolean foundPair = false; 086 Integer type1, type2; 087 for (int j = i + 1; j < chains.size() && !foundPair; j++) { 088 Chain c2 = chains.get(j); 089 if (j > i+1 && c.getAtomSequence().equals(c2.getAtomSequence()) && nonredundant) continue; 090 String sequence2 = c2.getAtomSequence(); 091 for (int k = c2.getAtomSequence().length() - 1; k >= 0 && !foundPair; k--) { 092 if (canonical && !BasePairParameters.match(sequence.charAt(m), sequence2.charAt(k), useRNA)) continue; 093 Group g1 = c.getAtomGroup(m); 094 Group g2 = c2.getAtomGroup(k); 095 type1 = BASE_MAP.get(g1.getPDBName()); 096 type2 = BASE_MAP.get(g2.getPDBName()); 097 if (type1 == null || type2 == null) continue; 098 Atom a1 = g1.getAtom("C1'"); 099 Atom a2 = g2.getAtom("C1'"); 100 if (a1 == null || a2 == null) continue; 101 // C1'-C1' distance is one useful criteria 102 if (Math.abs(a1.getCoordsAsPoint3d().distance(a2.getCoordsAsPoint3d()) - 10.0) > 4.0) continue; 103 Pair<Group> ga = new Pair<>(g1, g2); 104 // TODO is this call needed?? JD 2018-03-07 105 @SuppressWarnings("unused") 106 Matrix4d data = basePairReferenceFrame(ga); 107 // if the stagger is greater than 2 Å, it's not really paired. 108 if (Math.abs(pairParameters[5]) > maxStagger) continue; 109 // similarly, extreme shear and stretch is not a good base pair 110 if (Math.abs(pairParameters[3]) > maxShear) continue; 111 if (Math.abs(pairParameters[4]) > maxStretch) continue; 112 113 // if the propeller is ridiculous it's also not that good of a pair. 114 if (Math.abs(pairParameters[1]) > maxPropeller) { 115 continue; 116 } 117 result.add(ga); 118 pairingNames.add(useRNA ? BASE_LIST_RNA[type1] + BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1] + BASE_LIST_DNA[type2]); 119 foundPair = true; 120 } 121 if (!foundPair && lastFoundPair) { 122 if (pairSequence.length() > 0 && pairSequence.charAt(pairSequence.length() - 1) != ' ') 123 pairSequence += ' '; 124 } 125 if (foundPair) pairSequence += (c.getAtomSequence().charAt(i)); 126 lastFoundPair = foundPair; 127 } 128 } 129 } 130 return result; 131 } 132 133 /** 134 * This method returns the maximum stagger between bases used as criteria for the characterization of two bases as being paired. 135 * @return the maximum propeller ("propeller-twist", in degrees) allowed. 136 */ 137 public double getMaxStagger() { 138 return maxStagger; 139 } 140 141 /** 142 * This method sets the maximum stagger allowed for a base pair, prior to analyze() call 143 * @param maxStagger The maximum propeller (in Å) allowed to consider two bases paired 144 */ 145 public void setMaxStagger(double maxStagger) { 146 this.maxStagger = maxStagger; 147 } 148 149 /** 150 * This method returns the maximum shear between bases used as criteria for the characterization of two bases as being paired. 151 * @return the maximum shear (in Å) allowed. 152 */ 153 public double getMaxShear() { 154 return maxShear; 155 } 156 157 /** 158 * This method sets the maximum shear allowed for a base pair, prior to analyze() call 159 * @param maxShear The maximum shear (in Å) allowed to consider two bases paired 160 */ 161 public void setMaxShear(double maxShear) { 162 this.maxShear = maxShear; 163 } 164 165 /** 166 * This method returns the maximum stretch between bases used as criteria for the characterization of two bases as being paired. 167 * @return the maximum stretch (in Å) allowed. 168 */ 169 public double getMaxStretch() { 170 return maxStretch; 171 } 172 173 /** 174 * This method sets the maximum stretch allowed for a base pair, prior to analyze() call. 175 * @param maxStretch The maximum stretch (in Å) allowed to consider two bases paired 176 */ 177 public void setMaxStretch(double maxStretch) { 178 this.maxStretch = maxStretch; 179 } 180 181 /** 182 * This method returns the maximum propeller twist between bases used as criteria for the characterization of two bases as being paired. 183 * @return the maximum propeller ("propeller-twist", in degrees) allowed. 184 */ 185 public double getMaxPropeller() { 186 return maxPropeller; 187 } 188 189 /** 190 * This method sets the maximum propeller allowed for a base pair, prior to analyze() call 191 * @param maxPropeller The maximum propeller ("propeller-twist", in degrees) allowed to consider two bases paired 192 */ 193 public void setMaxPropeller(double maxPropeller) { 194 this.maxPropeller = maxPropeller; 195 } 196}