001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 *
021 */
022package org.biojava.nbio.structure.basepairs;
023
024import org.biojava.nbio.structure.Structure;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Group;
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.contact.Pair;
029import org.biojava.nbio.structure.geometry.SuperPosition;
030import org.biojava.nbio.structure.geometry.SuperPositionQCP;
031import org.biojava.nbio.structure.io.PDBFileReader;
032
033import javax.vecmath.Matrix4d;
034import javax.vecmath.Point3d;
035import java.io.ByteArrayInputStream;
036import java.io.IOException;
037import java.io.Serializable;
038import java.util.List;
039import java.util.Locale;
040import java.util.Arrays;
041import java.util.ArrayList;
042import java.util.Map;
043import java.util.HashMap;
044
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048import static java.lang.Math.sin;
049import static java.lang.Math.cos;
050import static java.lang.Math.atan2;
051import static java.lang.Math.acos;
052import static java.lang.Math.PI;
053
054/**
055 * This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters for any nucleic
056 * acid containing structure that has the information about the core base-pair rings.
057 * Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858
058 *
059 * The method that is usually overridden is findPairs(), this base implementation is used for a large-scale
060 * analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost
061 * 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017).
062 * Those who study tertiary structures for RNA folding should use the TertiaryBasePairParameters,
063 * because this base class is only looking for base pairs between separate strands that exactly line up.
064 * To relax the lining up policy and allow for non-canonical base pairs, use the MismatchedBasePairParameters
065 * class, which will not consider intra-strand base pairing.
066 *
067 * @author Luke Czapla
068 * @since 5.0.0
069 *
070 */
071public class BasePairParameters implements Serializable {
072
073        private static final long serialVersionUID = 6214502385L;
074        private static Logger log = LoggerFactory.getLogger(BasePairParameters.class);
075
076        // See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html
077        // and the paper cited at the top of this class (also as Table 1).
078        // These are hard-coded to avoid problems with resource paths.
079        public static final String[] STANDARD_BASES = new String[] {
080                        "SEQRES   1 A    1  A\n" +
081                                        "ATOM      2  N9    A A   1      -1.291   4.498   0.000\n" +
082                                        "ATOM      3  C8    A A   1       0.024   4.897   0.000\n" +
083                                        "ATOM      4  N7    A A   1       0.877   3.902   0.000\n" +
084                                        "ATOM      5  C5    A A   1       0.071   2.771   0.000\n" +
085                                        "ATOM      6  C6    A A   1       0.369   1.398   0.000\n" +
086                                        "ATOM      8  N1    A A   1      -0.668   0.532   0.000\n" +
087                                        "ATOM      9  C2    A A   1      -1.912   1.023   0.000\n" +
088                                        "ATOM     10  N3    A A   1      -2.320   2.290   0.000\n" +
089                                        "ATOM     11  C4    A A   1      -1.267   3.124   0.000\n" +
090                                        "END",
091                        "SEQRES   1 A    1  G\n" +
092                                        "ATOM      2  N9    G A   1      -1.289   4.551   0.000\n" +
093                                        "ATOM      3  C8    G A   1       0.023   4.962   0.000\n" +
094                                        "ATOM      4  N7    G A   1       0.870   3.969   0.000\n" +
095                                        "ATOM      5  C5    G A   1       0.071   2.833   0.000\n" +
096                                        "ATOM      6  C6    G A   1       0.424   1.460   0.000\n" +
097                                        "ATOM      8  N1    G A   1      -0.700   0.641   0.000\n" +
098                                        "ATOM      9  C2    G A   1      -1.999   1.087   0.000\n" +
099                                        "ATOM     11  N3    G A   1      -2.342   2.364   0.001\n" +
100                                        "ATOM     12  C4    G A   1      -1.265   3.177   0.000\n" +
101                                        "END",
102                        "SEQRES   1 A    1  T\n" +
103                                        "ATOM      2  N1    T A   1      -1.284   4.500   0.000\n" +
104                                        "ATOM      3  C2    T A   1      -1.462   3.135   0.000\n" +
105                                        "ATOM      5  N3    T A   1      -0.298   2.407   0.000\n" +
106                                        "ATOM      6  C4    T A   1       0.994   2.897   0.000\n" +
107                                        "ATOM      8  C5    T A   1       1.106   4.338   0.000\n" +
108                                        "ATOM     10  C6    T A   1      -0.024   5.057   0.000\n" +
109                                        "END",
110                        "SEQRES   1 A    1  C\n" +
111                                        "ATOM      2  N1    C A   1      -1.285   4.542   0.000\n" +
112                                        "ATOM      3  C2    C A   1      -1.472   3.158   0.000\n" +
113                                        "ATOM      5  N3    C A   1      -0.391   2.344   0.000\n" +
114                                        "ATOM      6  C4    C A   1       0.837   2.868   0.000\n" +
115                                        "ATOM      8  C5    C A   1       1.056   4.275   0.000\n" +
116                                        "ATOM      9  C6    C A   1      -0.023   5.068   0.000\n" +
117                                        "END",
118                        "SEQRES   1 A    1  U\n" +
119                                        "ATOM      2  N1    U A   1      -1.284   4.500   0.000\n" +
120                                        "ATOM      3  C2    U A   1      -1.462   3.131   0.000\n" +
121                                        "ATOM      5  N3    U A   1      -0.302   2.397   0.000\n" +
122                                        "ATOM      6  C4    U A   1       0.989   2.884   0.000\n" +
123                                        "ATOM      8  C5    U A   1       1.089   4.311   0.000\n" +
124                                        "ATOM      9  C6    U A   1      -0.024   5.053   0.000\n"
125        };
126
127        // this is also hard-coded data about standard WC base pairs for both DNA and RNA
128        protected static final String[] BASE_LIST_DNA = {"A", "G", "T", "C"};
129        protected static final String[] BASE_LIST_RNA = {"A", "G", "U", "C"};
130        protected static final Map<String, Integer> BASE_MAP;
131        // private static List<String> RNAspecific = Arrays.asList("U", "URA"),
132        //        DNAspecific = Arrays.asList("DC", "C", "CYT");
133        protected static final Map<Integer, List<String>> RING_MAP;
134        static {
135                BASE_MAP = new HashMap<>();
136                BASE_MAP.put("DA", 0); BASE_MAP.put("ADE", 0); BASE_MAP.put("A", 0);
137                BASE_MAP.put("DG", 1); BASE_MAP.put("GUA", 1); BASE_MAP.put("G", 1);
138                BASE_MAP.put("DT", 2); BASE_MAP.put("THY", 2); BASE_MAP.put("T", 2); BASE_MAP.put("U", 2); BASE_MAP.put("URA", 2);
139                BASE_MAP.put("DC", 3); BASE_MAP.put("CYT", 3); BASE_MAP.put("C", 3);
140
141                RING_MAP = new HashMap<>();
142                RING_MAP.put(0, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
143                RING_MAP.put(1, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
144                RING_MAP.put(2, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
145                RING_MAP.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
146        }
147
148        protected Structure structure;
149        protected boolean canonical = true;
150        protected boolean useRNA = false;
151        protected boolean nonredundant = false;
152        protected double[] pairParameters;
153
154        // this is the main data that the user wants to get back out from the procedure.
155        protected String pairSequence = "";
156        protected double[][] pairingParameters;
157        protected double[][] stepParameters;
158        protected List<String> pairingNames = new ArrayList<>();
159        protected List<Matrix4d> referenceFrames = new ArrayList<>();
160
161
162        /**
163         * This constructor takes a Structure object, finds base pair and base-pair step parameters
164         * for double-helical regions within the structure.
165         * @param structure The already-loaded structure to analyze.
166         * @param useRNA whether to look for canonical RNA pairs.  By default (false) it analyzes DNA.
167         * @param removeDups whether to only look for base-pair parameters for each unique sequence in
168         *  the structure (if set to <i>true</i>)
169         * @param canonical Whether to consider only Watson-Crick base pairs
170         */
171        public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) {
172                this.structure = structure;
173                this.useRNA = useRNA;
174                this.canonical = canonical;
175                this.nonredundant = removeDups;
176
177        }
178
179        /**
180         * This constructor takes a Structure object, whether to use RNA, and whether to remove duplicate sequences.
181         * @param structure The already-loaded structure to analyze.
182         * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
183         * @param removeDups if true, duplicate sequences will not be considered.  This is for the analysis of X-ray structures from
184         *                   RCSB, where there may be identical or similar units.
185         */
186        public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
187                this(structure, useRNA, removeDups, false);
188        }
189
190        /**
191         * This constructor takes a Structure object, and whether to use the RNA standard bases.
192         * @param structure The already-loaded structure to analyze.
193         * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
194         */
195        public BasePairParameters(Structure structure, boolean useRNA) {
196                this(structure, useRNA, false, false);
197        }
198
199        /**
200         * This constructor takes a Structure object, finds base pair and base-pair step parameters
201         * for double-helical regions within the structure for only canonical DNA pairs.
202         * @param structure The already-loaded structure to analyze.
203         */
204        public BasePairParameters(Structure structure) {
205                this(structure, false, false, true);
206        }
207
208
209        /**
210         * This method is the main function call to extract all step parameters, pairing parameters, and sequence
211         * information from the Structure object provided to the constructor.
212         * @return This same object with the populated data, convenient for output
213         *  (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>)
214         */
215        public BasePairParameters analyze() {
216                if (structure == null) {
217                        pairingParameters = null;
218                        stepParameters = null;
219                        return this;
220                }
221                List<Chain> nucleics = this.getNucleicChains(nonredundant);
222                List<Pair<Group>> pairs = this.findPairs(nucleics);
223                this.pairingParameters = new double[pairs.size()][6];
224                this.stepParameters = new double[pairs.size()][6];
225                Matrix4d lastStep;
226                Matrix4d currentStep = null;
227                for (int i = 0; i < pairs.size(); i++) {
228                        lastStep = currentStep;
229                        currentStep = this.basePairReferenceFrame(pairs.get(i));
230                        referenceFrames.add((Matrix4d)currentStep.clone());
231                        for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j];
232                        if (i != 0) {
233                                lastStep.invert();
234                                lastStep.mul(currentStep);
235                                double[] sparms = calculateTp(lastStep);
236                                for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
237                        }
238                }
239                return this;
240        }
241
242
243
244        /**
245         * This method returns the total number of base pairs that were found, used after the call to analyze().
246         * @return An integer value, number of base pairs
247         */
248        public int getLength() {
249                if (structure == null || pairParameters == null) throw new IllegalArgumentException("This structure is not analyzed or not initialized.");
250                return pairingParameters.length;
251        }
252
253
254        /**
255         * This method reports all the pair parameters, in the order of:
256         * buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å).
257         * @return A double[][] with length equal to number of base pairs for rows, and 6 columns
258         */
259        public double[][] getPairingParameters() {
260                return pairingParameters;
261        }
262
263        /**
264         * This method reports all the base-pair step parameters, in the order of:
265         * tilt, roll, twist (in degrees), shift, slide, rise (in Å).
266         * @return A double[][] with length equal to number of base pairs (the first row 0 has no step
267         *  and therefore is six zeroes), and 6 columns.
268         */
269        public double[][] getStepParameters() {
270                return stepParameters;
271        }
272
273
274        /**
275         * This method returns the primary strand's sequence where parameters were found.
276         * There are spaces in the string anywhere there was a break in the helix or when
277         * it goes from one helix to another helix in the structure. (the "step" is still returned)
278         * @return String of primary sequence with spaces between gaps and new helices.
279         */
280        public String getPairSequence() {
281                return pairSequence;
282        }
283
284
285        /**
286         * This method returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the
287         * list.  The first character is the leading strand base and the second character is the complementary base
288         * @return
289         */
290        public List<String> getPairingNames() {
291                return pairingNames;
292        }
293
294        public List<Matrix4d> getReferenceFrames() {
295                return referenceFrames;
296        }
297
298        /**
299         * This method is an internal test that the base pair specified is within a valid range.  If not, it throws an exception
300         * with a message.
301         * @param bp The index of the base pair or base-pair step to return.
302         */
303        private void checkArgument(int bp) {
304                if (bp < 0 || bp >= getPairingParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
305        }
306
307        /**
308         * This method returns the buckle in degrees for the given base pair
309         * @param bp the number of the base pair (starting with 0)
310         * @return the value as a double (in degrees)
311         */
312        public Double getBuckle(int bp) {
313                checkArgument(bp);
314                return pairingParameters[bp][0];
315        }
316
317        /**
318         * This method returns the propeller ("propeller-twist") in degrees for the given base pair
319         * @param bp the number of the base pair (starting with 0)
320         * @return the value as a double (in degrees)
321         */
322        public Double getPropeller(int bp) {
323                checkArgument(bp);
324                return pairingParameters[bp][1];
325        }
326
327        /**
328         * This method returns the opening in degrees for the given base pair
329         * @param bp the number of the base pair (starting with 0)
330         * @return the value as a double (in degrees)
331         */
332        public Double getOpening(int bp) {
333                checkArgument(bp);
334                return pairingParameters[bp][2];
335        }
336
337        /**
338         * This method returns the shear in Å for the given base pair
339         * @param bp the number of the base pair (starting with 0)
340         * @return the value as a double (in Å)
341         */
342        public Double getShear(int bp) {
343                checkArgument(bp);
344                return pairingParameters[bp][3];
345        }
346
347        /**
348         * This method returns the stretch in Å for the given base pair
349         * @param bp the number of the base pair (starting with 0)
350         * @return the value as a double (in Å)
351         */
352        public Double getStretch(int bp) {
353                checkArgument(bp);
354                return pairingParameters[bp][4];
355        }
356
357        /**
358         * This method returns the stagger in Å for the given base pair
359         * @param bp the number of the base pair (starting with 0)
360         * @return the value as a double (in Å)
361         */
362        public Double getStagger(int bp) {
363                checkArgument(bp);
364                return pairingParameters[bp][5];
365        }
366
367        /**
368         * This method returns the tilt for the given base pair, relative to the one before it.
369         * @param bp the number of the base pair (starting with 0)
370         * @return the value as a double (in degrees)
371         */
372        public Double getTilt(int bp) {
373                checkArgument(bp);
374                return stepParameters[bp][0];
375        }
376
377        /**
378         * This method returns the roll for the given base pair, relative to the one before it.
379         * @param bp the number of the base pair (starting with 0)
380         * @return the value as a double (in degrees)
381         */
382        public Double getRoll(int bp) {
383                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
384                return stepParameters[bp][1];
385        }
386
387        /**
388         * This method returns the twist for the given base pair, relative to the one before it.
389         * @param bp the number of the base pair (starting with 0)
390         * @return the value as a double (in degrees)
391         */
392        public Double getTwist(int bp) {
393                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
394                return stepParameters[bp][2];
395        }
396
397        /**
398         * Return the shift for the given base pair, relative to the one before it.
399         * @param bp the number of the base pair (starting with 0)
400         * @return the value as a double (in Å)
401         */
402        public Double getShift(int bp) {
403                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
404                return stepParameters[bp][3];
405        }
406
407        /**
408         * This method returns the slide for the given base pair, relative to the one before it.
409         * @param bp the number of the base pair (starting with 0)
410         * @return the value as a double (in Å)
411         */
412        public Double getSlide(int bp) {
413                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
414                return stepParameters[bp][4];
415        }
416
417        /**
418         * This method returns the rise for the given base pair, relative to the one before it.
419         * @param bp the number of the base pair (starting with 0)
420         * @return the value as a double (in Å)
421         */
422        public Double getRise(int bp) {
423                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
424                return stepParameters[bp][5];
425        }
426
427
428        /**
429         * This method reports all the nucleic acid chains and has an option to remove duplicates if you
430         * are considering an analysis of only unique DNA or RNA helices in the Structure.
431         * @param removeDups If true, it will ignore duplicate chains
432         * @return A list of all the nucleic acid chains in order of the Structure
433         */
434        public List<Chain> getNucleicChains(boolean removeDups) {
435                if (structure == null) return new ArrayList<>();
436                List<Chain> chains = structure.getChains();
437                List<Chain> result = new ArrayList<>();
438                for (Chain c: chains) {
439                        if (c.isNucleicAcid()) {
440                                result.add(c);
441                        }
442                }
443                if (removeDups) for (int i = 0; i < result.size(); i++) {
444                        for (int j = i+2; j < result.size(); j++) {
445                                // remove duplicate sequences (structures with two or more identical units)
446                                if (result.get(i).getAtomSequence().equals(result.get(j).getAtomSequence())) {
447                                        result.remove(j);
448                                }
449                        }
450                }
451                return result;
452        }
453
454        /**
455         * This method performs a search for base pairs in the structure.  The criteria is alignment of
456         * sequences and the canonical base pairs of DNA or RNA. Use MismatchedBasePairParameters
457         * or TertiaryBasePairParameters for finding higher-order associations.
458         * @param chains The list of chains already found to be nucleic acids
459         * @return The list of corresponding Watson-Crick groups as pairs, as a Pair of nucleic acid Groups
460         */
461        public List<Pair<Group>> findPairs(List<Chain> chains) {
462                List<Pair<Group>> result = new ArrayList<>();
463                for (int i = 0; i < chains.size(); i++) {
464                        Chain c = chains.get(i);
465                        for (int j = i+1; j < chains.size(); j++) {
466                                String complement = complement(chains.get(j).getAtomSequence(), useRNA);
467                                String match = longestCommonSubstring(c.getAtomSequence(), complement);
468                                if (log.isDebugEnabled()) {
469                                        log.debug(c.getAtomSequence() + " " + chains.get(j).getAtomSequence() + " " + match);
470                                }
471                                int index1 = c.getAtomSequence().indexOf(match);
472                                int index2 = complement.length() - complement.indexOf(match) - 1;
473                                for (int k = 0; k < match.length(); k++) {
474                                        Group g1 = c.getAtomGroup(index1+k);
475                                        Group g2 = chains.get(j).getAtomGroup(index2-k);
476                                        Integer type1 = BASE_MAP.get(g1.getPDBName());
477                                        Integer type2 = BASE_MAP.get(g2.getPDBName());
478                                        if (type1 == null || type2 == null) {
479                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
480                                                continue;
481                                        }
482                                        Atom a1 = g1.getAtom(RING_MAP.get(type1).get(0));
483                                        Atom a2 = g2.getAtom(RING_MAP.get(type2).get(0));
484
485                                        if (a1 == null) {
486                                                log.info("Error processing " + g1.getPDBName());
487                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
488                                                continue;
489                                        }
490                                        if (a2 == null) {
491                                                log.info("Error processing " + g2.getPDBName());
492                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
493                                                continue;
494                                        }
495
496                                        double dx = a1.getX()-a2.getX();
497                                        double dy = a1.getY()-a2.getY();
498                                        double dz = a1.getZ()-a2.getZ();
499                                        double distance = Math.sqrt(dx*dx+dy*dy+dz*dz);
500                                        //log.info("C8-C6 Distance (Å): " + distance);
501                                        // could be a base pair
502                                        if (Math.abs(distance-10.0) < 4.0) {
503                                                boolean valid = true;
504                                                for (String atomname : RING_MAP.get(type1)) {
505                                                        Atom a = g1.getAtom(atomname);
506                                                        if (a == null) valid = false;
507                                                }
508                                                if (valid) for (String atomname: RING_MAP.get(type2)) {
509                                                        Atom a = g2.getAtom(atomname);
510                                                        if (a == null) valid = false;
511                                                }
512                                                if (valid) {
513                                                        result.add(new Pair<Group>(g1, g2));
514                                                        pairingNames.add((useRNA ? BASE_LIST_RNA[type1]+ BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1]+ BASE_LIST_DNA[type2]));
515                                                        pairSequence += c.getAtomSequence().charAt(index1 + k);
516                                                } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
517                                        } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
518                                }
519                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
520                        }
521                        //log.info();
522                }
523                log.info("Matched: " + pairSequence);
524                return result;
525        }
526
527
528        /**
529         * This method calculates the central frame (4x4 transformation matrix) of a single base pair.
530         * @param pair An array of the two groups that make a hypothetical pair
531         * @return The middle frame of the center of the base-pair formed
532         */
533        public Matrix4d basePairReferenceFrame(Pair<Group> pair) {
534                Integer type1 = BASE_MAP.get(pair.getFirst().getPDBName());
535                Integer type2 = BASE_MAP.get(pair.getSecond().getPDBName());
536                SuperPosition sp = new SuperPositionQCP(true);
537                if (type1 == null || type2 == null) return null;
538                PDBFileReader pdbFileReader = new PDBFileReader();
539                Structure s1, s2;
540                try {
541                        s1 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type1].getBytes()));
542                        s2 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type2].getBytes()));
543                } catch (IOException e) {
544                        e.printStackTrace();
545                        return null;
546                }
547                Group std1 = s1.getChain("A").getAtomGroup(0);
548                Group std2 = s2.getChain("A").getAtomGroup(0);
549
550                Point3d[] pointref = new Point3d[std1.getAtoms().size()];
551                Point3d[] pointact = new Point3d[std1.getAtoms().size()];
552                int count = 0;
553
554                for (Atom a : std1.getAtoms()) {
555                        if (pair.getFirst().getAtom(a.getName()) == null) return null;
556                        pointref[count] = a.getCoordsAsPoint3d();
557                        pointact[count] = pair.getFirst().getAtom(a.getName()).getCoordsAsPoint3d();
558                        count++;
559                }
560                assert count == std1.getAtoms().size();
561                Matrix4d ref1 = (Matrix4d)sp.superposeAndTransform(pointact, pointref).clone();
562
563                pointref = new Point3d[std2.getAtoms().size()];
564                pointact = new Point3d[std2.getAtoms().size()];
565
566                count = 0;
567                for (Atom a : std2.getAtoms()) {
568                        if (pair.getSecond().getAtom(a.getName()) == null) return null;
569                        pointref[count] = a.getCoordsAsPoint3d();
570                        pointact[count] = pair.getSecond().getAtom(a.getName()).getCoordsAsPoint3d();
571                        count++;
572                }
573                assert count == std2.getAtoms().size();
574
575                Matrix4d temp = (Matrix4d)ref1.clone();
576                Matrix4d temp2 = (Matrix4d)temp.clone();
577                Matrix4d ref2 = sp.superposeAndTransform(pointact, pointref);
578
579                double[][] v = new double[3][4];
580                double[] y3 = new double[4];
581                double[] z3 = new double[4];
582                ref2.getColumn(1, y3);
583                ref2.getColumn(2, z3);
584                double[] z31 = new double[4];
585                ref1.getColumn(2, z31);
586                if (z3[0]*z31[0]+z3[1]*z31[1]+z3[2]*z31[2] < 0.0) {
587                        for (int i = 0; i < 3; i++) {
588                                y3[i] *= -1.0;
589                                z3[i] *= -1.0;
590                        }
591                }
592                ref2.setColumn(1, y3);
593                ref2.setColumn(2, z3);
594
595                temp.add(ref2);
596                temp.mul(0.5);
597                double[] x3 = new double[4];
598                temp.getColumn(0, x3);
599                temp.getColumn(1, y3);
600                temp.getColumn(2, z3);
601                x3 = removeComponent(x3, z3);
602                x3 = removeComponent(x3, y3);
603                y3 = removeComponent(y3, z3);
604                temp.setColumn(0, x3);
605                temp.setColumn(1, y3);
606                temp.setColumn(2, z3);
607
608                // normalize the short, long, and normal axes
609                for (int i = 0; i < 3; i++) {
610                        temp.getColumn(i, v[i]);
611                        double r = Math.sqrt(v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]);
612                        for (int j = 0; j < 3; j++) {
613                                v[i][j] /= r;
614                        }
615                        temp.setColumn(i, v[i]);
616                }
617
618                // calculate pairing parameters: buckle, propeller, opening, shear, stretch, stagger
619                temp2.invert();
620                temp2.mul(ref2);
621                pairParameters = calculateTp(temp2);
622                for (int i = 0; i < 6; i++) pairParameters[i] *= -1;
623
624                // return the central frame of the base pair
625                return temp;
626
627        }
628
629
630        @Override
631        public String toString() {
632                if (getPairingParameters() == null) return "No data";
633                StringBuilder result = new StringBuilder(10000);
634                result.append(pairingParameters.length + " base pairs\n");
635                result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n");
636                for (int i = 0; i < pairingParameters.length; i++) {
637                        result.append(pairingNames.get(i)+": ");
638                        for (int j = 0; j < 6; j++)
639                                result.append(String.format(Locale.US, "%5.4f", pairingParameters[i][j]) + " ");
640                        for (int j = 0; j < 6; j++)
641                                result.append(String.format(Locale.US, "%5.4f", stepParameters[i][j]) + " ");
642                        result.append("\n");
643                }
644                return result.toString();
645        }
646
647
648        // The following methods are just helper classes for the rapid analyze of base-pair geometry.
649        /**
650         * This method calculates pairing and step parameters from 4x4 transformation matrices (used internally)
651         * that comes out as a Matrix4d.
652         * @param input the 4x4 matrix representing the transformation from strand II -> strand I or pair i to pair i+1
653         * @return Six parameters as double[6]
654         */
655        public static double[] calculateTp(Matrix4d input) {
656
657                double[][] A = new double[4][4];
658                for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) {
659                        A[i][j] = input.getElement(i, j);
660                }
661                double[] M = new double[6];
662
663                double cosgamma, gamma, phi, omega, sgcp, omega2_minus_phi,
664                                sm, cm, sp, cp, sg, cg;
665
666                cosgamma = A[2][2];
667                if (cosgamma > 1.0) cosgamma = 1.0;
668                else if (cosgamma < -1.0) cosgamma = -1.0;
669
670                gamma = acos(cosgamma);
671
672                sgcp = A[1][1]*A[0][2]-A[0][1]*A[1][2];
673
674                if (gamma == 0.0) omega = -atan2(A[0][1],A[1][1]);
675                else omega = atan2(A[2][1]*A[0][2]+sgcp*A[1][2],sgcp*A[0][2]-A[2][1]*A[1][2]);
676
677                omega2_minus_phi = atan2(A[1][2],A[0][2]);
678
679                phi = omega/2.0 - omega2_minus_phi;
680
681                M[0] = gamma*sin(phi)*180.0/PI;
682                M[1] = gamma*cos(phi)*180.0/PI;
683                M[2] = omega*180.0/PI;
684
685                sm = sin(omega/2.0-phi);
686                cm = cos(omega/2.0-phi);
687                sp = sin(phi);
688                cp = cos(phi);
689                sg = sin(gamma/2.0);
690                cg = cos(gamma/2.0);
691
692                M[3] = (cm*cg*cp-sm*sp)*A[0][3]+(sm*cg*cp+cm*sp)*A[1][3]-sg*cp*A[2][3];
693                M[4] = (-cm*cg*sp-sm*cp)*A[0][3]+(-sm*cg*sp+cm*cp)*A[1][3]+sg*sp*A[2][3];
694                M[5] = (cm*sg)*A[0][3]+(sm*sg)*A[1][3]+cg*A[2][3];
695
696                return M;
697
698        }
699
700        /**
701         * This method returns the complement of a base. (used internally)
702         * @param base The letter of the base
703         * @param RNA Whether it is RNA (if false, it is DNA)
704         * @return The character representing the complement of the base
705         */
706        protected static char complementBase(char base, boolean RNA) {
707                if (base == 'A' && RNA) return 'U';
708                if (base == 'A') return 'T';
709                if (base == 'T' && !RNA) return 'A';
710                if (base == 'U' && RNA) return 'A';
711                if (base == 'C') return 'G';
712                if (base == 'G') return 'C';
713                return ' ';
714        }
715
716        /**
717         * Simple helper method for quickly checking the complement of a sequence, see also DNASequence nad RNASequence classes
718         * for more extensively useful functions not used in this narrow context of structural biology of base pairs.  (Used internally)
719         */
720        private static String complement(String sequence, boolean RNA) {
721                String result = "";
722                for (int i = sequence.length() - 1; i >= 0; i--) {
723                        result += complementBase(sequence.charAt(i), RNA);
724                }
725                return result;
726        }
727
728        /**
729         * This does a 3D Vector cross product of two vectors as double arrays. (used internally)
730         *
731         * @param a An array of length 3 or 4 (4th component is ignored)
732         * @param b An array of length 3 or 4 (4th component is ignored)
733         * @return The cross product of the vectors (just the first three components
734         */
735        @SuppressWarnings("unused")
736        private static double[] cross(double[] a, double[] b) {
737                assert a.length >= 3 && b.length >= 3;
738                double[] result = new double[4];
739                result[0] = a[1]*b[2]-a[2]*b[1];
740                result[1] = a[2]*b[0]-a[0]*b[2];
741                result[2] = a[0]*b[1]-a[1]*b[0];
742                return result;
743        }
744
745        /**
746         * This method removes any component of vector a that is along vector b. (used internally)
747         * @param a The array (vector) to remove component from
748         * @param b The component array (vector) to remove from the first
749         * @return The original array a with any component along b removed from it.
750         */
751        private static double[] removeComponent(double[] a, double[] b) {
752                double dot = 0;
753                double[] result = new double[4];
754                for (int i = 0; i < 3; i++) {
755                        dot += a[i]*b[i];
756                }
757                for (int i = 0; i < 3; i++) {
758                        result[i] = a[i]-dot*b[i];
759                }
760                return result;
761
762        }
763
764        /**
765         * This method finds the longest common substring between two strings. (used internally)
766         * @param s1 The first string
767         * @param s2 The second string
768         * @return The substring itself
769         */
770        private static String longestCommonSubstring(String s1, String s2) {
771                int start = 0;
772                int max = 0;
773                for (int i = 0; i < s1.length(); i++) {
774                        for (int j = 0; j < s2.length(); j++) {
775                                int x = 0;
776                                while (s1.charAt(i + x) == s2.charAt(j + x)) {
777                                        x++;
778                                        if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break;
779                                }
780                                if (x > max) {
781                                        max = x;
782                                        start = i;
783                                }
784                        }
785                }
786                return s1.substring(start, (start + max));
787        }
788
789        /**
790         * This returns true if a is the complement of b, false otherwise. (used internally)
791         * @param a First letter
792         * @param b Potential matching letter
793         * @param RNA Whether it is RNA (if false, DNA rules are used)
794         * @return True if the bases are complementary.
795         */
796        protected static boolean match(char a, char b, boolean RNA) {
797                if (a == 'A' && b == 'T' && !RNA) return true;
798                if (a == 'A' && b == 'U' && RNA) return true;
799                if (a == 'T' && b == 'A' && !RNA) return true;
800                if (a == 'U' && b == 'A' && RNA) return true;
801                if (a == 'G' && b == 'C') return true;
802                if (a == 'C' && b == 'G') return true;
803                return false;
804        }
805}