001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 *
021 */
022package org.biojava.nbio.structure.basepairs;
023
024import org.biojava.nbio.structure.Structure;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Group;
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.contact.Pair;
029import org.biojava.nbio.structure.geometry.SuperPosition;
030import org.biojava.nbio.structure.geometry.SuperPositionQCP;
031import org.biojava.nbio.structure.io.PDBFileReader;
032
033import javax.vecmath.Matrix4d;
034import javax.vecmath.Point3d;
035import java.io.ByteArrayInputStream;
036import java.io.IOException;
037import java.io.Serializable;
038import java.util.List;
039import java.util.Arrays;
040import java.util.ArrayList;
041import java.util.Map;
042import java.util.HashMap;
043
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import static java.lang.Math.sin;
048import static java.lang.Math.cos;
049import static java.lang.Math.atan2;
050import static java.lang.Math.acos;
051import static java.lang.Math.PI;
052
053/**
054 * This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters for any nucleic
055 * acid containing structure that has the information about the core base-pair rings.
056 * Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858
057 *
058 * The method that is usually overridden is findPairs(), this base implementation is used for a large-scale
059 * analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost
060 * 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017).
061 * Those who study tertiary structures for RNA folding should use the TertiaryBasePairParameters,
062 * because this base class is only looking for base pairs between separate strands that exactly line up.
063 * To relax the lining up policy and allow for non-canonical base pairs, use the MismatchedBasePairParameters
064 * class, which will not consider intra-strand base pairing.
065 *
066 * @author Luke Czapla
067 * @since 5.0.0
068 *
069 */
070public class BasePairParameters implements Serializable {
071
072        private static final long serialVersionUID = 6214502385L;
073        private static Logger log = LoggerFactory.getLogger(BasePairParameters.class);
074
075        // See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html
076        // and the paper cited at the top of this class (also as Table 1).
077        // These are hard-coded to avoid problems with resource paths.
078        public static final String[] STANDARD_BASES = new String[] {
079                        "SEQRES   1 A    1  A\n" +
080                                        "ATOM      2  N9    A A   1      -1.291   4.498   0.000\n" +
081                                        "ATOM      3  C8    A A   1       0.024   4.897   0.000\n" +
082                                        "ATOM      4  N7    A A   1       0.877   3.902   0.000\n" +
083                                        "ATOM      5  C5    A A   1       0.071   2.771   0.000\n" +
084                                        "ATOM      6  C6    A A   1       0.369   1.398   0.000\n" +
085                                        "ATOM      8  N1    A A   1      -0.668   0.532   0.000\n" +
086                                        "ATOM      9  C2    A A   1      -1.912   1.023   0.000\n" +
087                                        "ATOM     10  N3    A A   1      -2.320   2.290   0.000\n" +
088                                        "ATOM     11  C4    A A   1      -1.267   3.124   0.000\n" +
089                                        "END",
090                        "SEQRES   1 A    1  G\n" +
091                                        "ATOM      2  N9    G A   1      -1.289   4.551   0.000\n" +
092                                        "ATOM      3  C8    G A   1       0.023   4.962   0.000\n" +
093                                        "ATOM      4  N7    G A   1       0.870   3.969   0.000\n" +
094                                        "ATOM      5  C5    G A   1       0.071   2.833   0.000\n" +
095                                        "ATOM      6  C6    G A   1       0.424   1.460   0.000\n" +
096                                        "ATOM      8  N1    G A   1      -0.700   0.641   0.000\n" +
097                                        "ATOM      9  C2    G A   1      -1.999   1.087   0.000\n" +
098                                        "ATOM     11  N3    G A   1      -2.342   2.364   0.001\n" +
099                                        "ATOM     12  C4    G A   1      -1.265   3.177   0.000\n" +
100                                        "END",
101                        "SEQRES   1 A    1  T\n" +
102                                        "ATOM      2  N1    T A   1      -1.284   4.500   0.000\n" +
103                                        "ATOM      3  C2    T A   1      -1.462   3.135   0.000\n" +
104                                        "ATOM      5  N3    T A   1      -0.298   2.407   0.000\n" +
105                                        "ATOM      6  C4    T A   1       0.994   2.897   0.000\n" +
106                                        "ATOM      8  C5    T A   1       1.106   4.338   0.000\n" +
107                                        "ATOM     10  C6    T A   1      -0.024   5.057   0.000\n" +
108                                        "END",
109                        "SEQRES   1 A    1  C\n" +
110                                        "ATOM      2  N1    C A   1      -1.285   4.542   0.000\n" +
111                                        "ATOM      3  C2    C A   1      -1.472   3.158   0.000\n" +
112                                        "ATOM      5  N3    C A   1      -0.391   2.344   0.000\n" +
113                                        "ATOM      6  C4    C A   1       0.837   2.868   0.000\n" +
114                                        "ATOM      8  C5    C A   1       1.056   4.275   0.000\n" +
115                                        "ATOM      9  C6    C A   1      -0.023   5.068   0.000\n" +
116                                        "END",
117                        "SEQRES   1 A    1  U\n" +
118                                        "ATOM      2  N1    U A   1      -1.284   4.500   0.000\n" +
119                                        "ATOM      3  C2    U A   1      -1.462   3.131   0.000\n" +
120                                        "ATOM      5  N3    U A   1      -0.302   2.397   0.000\n" +
121                                        "ATOM      6  C4    U A   1       0.989   2.884   0.000\n" +
122                                        "ATOM      8  C5    U A   1       1.089   4.311   0.000\n" +
123                                        "ATOM      9  C6    U A   1      -0.024   5.053   0.000\n"
124        };
125
126        // this is also hard-coded data about standard WC base pairs for both DNA and RNA
127        protected static final String[] BASE_LIST_DNA = {"A", "G", "T", "C"};
128        protected static final String[] BASE_LIST_RNA = {"A", "G", "U", "C"};
129        protected static final Map<String, Integer> BASE_MAP;
130        // private static List<String> RNAspecific = Arrays.asList("U", "URA"),
131        //        DNAspecific = Arrays.asList("DC", "C", "CYT");
132        protected static final Map<Integer, List<String>> RING_MAP;
133        static {
134                BASE_MAP = new HashMap<>();
135                BASE_MAP.put("DA", 0); BASE_MAP.put("ADE", 0); BASE_MAP.put("A", 0);
136                BASE_MAP.put("DG", 1); BASE_MAP.put("GUA", 1); BASE_MAP.put("G", 1);
137                BASE_MAP.put("DT", 2); BASE_MAP.put("THY", 2); BASE_MAP.put("T", 2); BASE_MAP.put("U", 2); BASE_MAP.put("URA", 2);
138                BASE_MAP.put("DC", 3); BASE_MAP.put("CYT", 3); BASE_MAP.put("C", 3);
139
140                RING_MAP = new HashMap<>();
141                RING_MAP.put(0, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
142                RING_MAP.put(1, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
143                RING_MAP.put(2, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
144                RING_MAP.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
145        }
146
147        protected Structure structure;
148        protected boolean canonical = true;
149        protected boolean useRNA = false;
150        protected boolean nonredundant = false;
151        protected double[] pairParameters;
152
153        // this is the main data that the user wants to get back out from the procedure.
154        protected String pairSequence = "";
155        protected double[][] pairingParameters;
156        protected double[][] stepParameters;
157        protected List<String> pairingNames = new ArrayList<>();
158        protected List<Matrix4d> referenceFrames = new ArrayList<>();
159
160
161        /**
162         * This constructor takes a Structure object, finds base pair and base-pair step parameters
163         * for double-helical regions within the structure.
164         * @param structure The already-loaded structure to analyze.
165         * @param useRNA whether to look for canonical RNA pairs.  By default (false) it analyzes DNA.
166         * @param removeDups whether to only look for base-pair parameters for each unique sequence in
167         *  the structure (if set to <i>true</i>)
168         * @param canonical Whether to consider only Watson-Crick base pairs
169         */
170        public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) {
171                this.structure = structure;
172                this.useRNA = useRNA;
173                this.canonical = canonical;
174                this.nonredundant = removeDups;
175
176        }
177
178        /**
179         * This constructor takes a Structure object, whether to use RNA, and whether to remove duplicate sequences.
180         * @param structure The already-loaded structure to analyze.
181         * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
182         * @param removeDups if true, duplicate sequences will not be considered.  This is for the analysis of X-ray structures from
183         *                   RCSB, where there may be identical or similar units.
184         */
185        public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
186                this(structure, useRNA, removeDups, false);
187        }
188
189        /**
190         * This constructor takes a Structure object, and whether to use the RNA standard bases.
191         * @param structure The already-loaded structure to analyze.
192         * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
193         */
194        public BasePairParameters(Structure structure, boolean useRNA) {
195                this(structure, useRNA, false, false);
196        }
197
198        /**
199         * This constructor takes a Structure object, finds base pair and base-pair step parameters
200         * for double-helical regions within the structure for only canonical DNA pairs.
201         * @param structure The already-loaded structure to analyze.
202         */
203        public BasePairParameters(Structure structure) {
204                this(structure, false, false, true);
205        }
206
207
208        /**
209         * This method is the main function call to extract all step parameters, pairing parameters, and sequence
210         * information from the Structure object provided to the constructor.
211         * @return This same object with the populated data, convenient for output
212         *  (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>)
213         */
214        public BasePairParameters analyze() {
215                if (structure == null) {
216                        pairingParameters = null;
217                        stepParameters = null;
218                        return this;
219                }
220                List<Chain> nucleics = this.getNucleicChains(nonredundant);
221                List<Pair<Group>> pairs = this.findPairs(nucleics);
222                this.pairingParameters = new double[pairs.size()][6];
223                this.stepParameters = new double[pairs.size()][6];
224                Matrix4d lastStep;
225                Matrix4d currentStep = null;
226                for (int i = 0; i < pairs.size(); i++) {
227                        lastStep = currentStep;
228                        currentStep = this.basePairReferenceFrame(pairs.get(i));
229                        referenceFrames.add((Matrix4d)currentStep.clone());
230                        for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j];
231                        if (i != 0) {
232                                lastStep.invert();
233                                lastStep.mul(currentStep);
234                                double[] sparms = calculateTp(lastStep);
235                                for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
236                        }
237                }
238                return this;
239        }
240
241
242
243        /**
244         * This method returns the total number of base pairs that were found, used after the call to analyze().
245         * @return An integer value, number of base pairs
246         */
247        public int getLength() {
248                if (structure == null || pairParameters == null) throw new IllegalArgumentException("This structure is not analyzed or not initialized.");
249                return pairingParameters.length;
250        }
251
252
253        /**
254         * This method reports all the pair parameters, in the order of:
255         * buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å).
256         * @return A double[][] with length equal to number of base pairs for rows, and 6 columns
257         */
258        public double[][] getPairingParameters() {
259                return pairingParameters;
260        }
261
262        /**
263         * This method reports all the base-pair step parameters, in the order of:
264         * tilt, roll, twist (in degrees), shift, slide, rise (in Å).
265         * @return A double[][] with length equal to number of base pairs (the first row 0 has no step
266         *  and therefore is six zeroes), and 6 columns.
267         */
268        public double[][] getStepParameters() {
269                return stepParameters;
270        }
271
272
273        /**
274         * This method returns the primary strand's sequence where parameters were found.
275         * There are spaces in the string anywhere there was a break in the helix or when
276         * it goes from one helix to another helix in the structure. (the "step" is still returned)
277         * @return String of primary sequence with spaces between gaps and new helices.
278         */
279        public String getPairSequence() {
280                return pairSequence;
281        }
282
283
284        /**
285         * This method returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the
286         * list.  The first character is the leading strand base and the second character is the complementary base
287         * @return
288         */
289        public List<String> getPairingNames() {
290                return pairingNames;
291        }
292
293        public List<Matrix4d> getReferenceFrames() {
294                return referenceFrames;
295        }
296
297        /**
298         * This method is an internal test that the base pair specified is within a valid range.  If not, it throws an exception
299         * with a message.
300         * @param bp The index of the base pair or base-pair step to return.
301         */
302        private void checkArgument(int bp) {
303                if (bp < 0 || bp >= getPairingParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
304        }
305
306        /**
307         * This method returns the buckle in degrees for the given base pair
308         * @param bp the number of the base pair (starting with 0)
309         * @return the value as a double (in degrees)
310         */
311        public Double getBuckle(int bp) {
312                checkArgument(bp);
313                return pairingParameters[bp][0];
314        }
315
316        /**
317         * This method returns the propeller ("propeller-twist") in degrees for the given base pair
318         * @param bp the number of the base pair (starting with 0)
319         * @return the value as a double (in degrees)
320         */
321        public Double getPropeller(int bp) {
322                checkArgument(bp);
323                return pairingParameters[bp][1];
324        }
325
326        /**
327         * This method returns the opening in degrees for the given base pair
328         * @param bp the number of the base pair (starting with 0)
329         * @return the value as a double (in degrees)
330         */
331        public Double getOpening(int bp) {
332                checkArgument(bp);
333                return pairingParameters[bp][2];
334        }
335
336        /**
337         * This method returns the shear in Å for the given base pair
338         * @param bp the number of the base pair (starting with 0)
339         * @return the value as a double (in Å)
340         */
341        public Double getShear(int bp) {
342                checkArgument(bp);
343                return pairingParameters[bp][3];
344        }
345
346        /**
347         * This method returns the stretch in Å for the given base pair
348         * @param bp the number of the base pair (starting with 0)
349         * @return the value as a double (in Å)
350         */
351        public Double getStretch(int bp) {
352                checkArgument(bp);
353                return pairingParameters[bp][4];
354        }
355
356        /**
357         * This method returns the stagger in Å for the given base pair
358         * @param bp the number of the base pair (starting with 0)
359         * @return the value as a double (in Å)
360         */
361        public Double getStagger(int bp) {
362                checkArgument(bp);
363                return pairingParameters[bp][5];
364        }
365
366        /**
367         * This method returns the tilt for the given base pair, relative to the one before it.
368         * @param bp the number of the base pair (starting with 0)
369         * @return the value as a double (in degrees)
370         */
371        public Double getTilt(int bp) {
372                checkArgument(bp);
373                return stepParameters[bp][0];
374        }
375
376        /**
377         * This method returns the roll for the given base pair, relative to the one before it.
378         * @param bp the number of the base pair (starting with 0)
379         * @return the value as a double (in degrees)
380         */
381        public Double getRoll(int bp) {
382                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
383                return stepParameters[bp][1];
384        }
385
386        /**
387         * This method returns the twist for the given base pair, relative to the one before it.
388         * @param bp the number of the base pair (starting with 0)
389         * @return the value as a double (in degrees)
390         */
391        public Double getTwist(int bp) {
392                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
393                return stepParameters[bp][2];
394        }
395
396        /**
397         * Return the shift for the given base pair, relative to the one before it.
398         * @param bp the number of the base pair (starting with 0)
399         * @return the value as a double (in Å)
400         */
401        public Double getShift(int bp) {
402                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
403                return stepParameters[bp][3];
404        }
405
406        /**
407         * This method returns the slide for the given base pair, relative to the one before it.
408         * @param bp the number of the base pair (starting with 0)
409         * @return the value as a double (in Å)
410         */
411        public Double getSlide(int bp) {
412                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
413                return stepParameters[bp][4];
414        }
415
416        /**
417         * This method returns the rise for the given base pair, relative to the one before it.
418         * @param bp the number of the base pair (starting with 0)
419         * @return the value as a double (in Å)
420         */
421        public Double getRise(int bp) {
422                if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
423                return stepParameters[bp][5];
424        }
425
426
427        /**
428         * This method reports all the nucleic acid chains and has an option to remove duplicates if you
429         * are considering an analysis of only unique DNA or RNA helices in the Structure.
430         * @param removeDups If true, it will ignore duplicate chains
431         * @return A list of all the nucleic acid chains in order of the Structure
432         */
433        public List<Chain> getNucleicChains(boolean removeDups) {
434                if (structure == null) return new ArrayList<>();
435                List<Chain> chains = structure.getChains();
436                List<Chain> result = new ArrayList<>();
437                for (Chain c: chains) {
438                        if (c.isNucleicAcid()) {
439                                result.add(c);
440                        }
441                }
442                if (removeDups) for (int i = 0; i < result.size(); i++) {
443                        for (int j = i+2; j < result.size(); j++) {
444                                // remove duplicate sequences (structures with two or more identical units)
445                                if (result.get(i).getAtomSequence().equals(result.get(j).getAtomSequence())) {
446                                        result.remove(j);
447                                }
448                        }
449                }
450                return result;
451        }
452
453        /**
454         * This method performs a search for base pairs in the structure.  The criteria is alignment of
455         * sequences and the canonical base pairs of DNA or RNA. Use MismatchedBasePairParameters
456         * or TertiaryBasePairParameters for finding higher-order associations.
457         * @param chains The list of chains already found to be nucleic acids
458         * @return The list of corresponding Watson-Crick groups as pairs, as a Pair of nucleic acid Groups
459         */
460        public List<Pair<Group>> findPairs(List<Chain> chains) {
461                List<Pair<Group>> result = new ArrayList<>();
462                for (int i = 0; i < chains.size(); i++) {
463                        Chain c = chains.get(i);
464                        for (int j = i+1; j < chains.size(); j++) {
465                                String complement = complement(chains.get(j).getAtomSequence(), useRNA);
466                                String match = longestCommonSubstring(c.getAtomSequence(), complement);
467                                if (log.isDebugEnabled()) {
468                                        log.debug(c.getAtomSequence() + " " + chains.get(j).getAtomSequence() + " " + match);
469                                }
470                                int index1 = c.getAtomSequence().indexOf(match);
471                                int index2 = complement.length() - complement.indexOf(match) - 1;
472                                for (int k = 0; k < match.length(); k++) {
473                                        Group g1 = c.getAtomGroup(index1+k);
474                                        Group g2 = chains.get(j).getAtomGroup(index2-k);
475                                        Integer type1 = BASE_MAP.get(g1.getPDBName());
476                                        Integer type2 = BASE_MAP.get(g2.getPDBName());
477                                        if (type1 == null || type2 == null) {
478                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
479                                                continue;
480                                        }
481                                        Atom a1 = g1.getAtom(RING_MAP.get(type1).get(0));
482                                        Atom a2 = g2.getAtom(RING_MAP.get(type2).get(0));
483
484                                        if (a1 == null) {
485                                                log.info("Error processing " + g1.getPDBName());
486                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
487                                                continue;
488                                        }
489                                        if (a2 == null) {
490                                                log.info("Error processing " + g2.getPDBName());
491                                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
492                                                continue;
493                                        }
494
495                                        double dx = a1.getX()-a2.getX();
496                                        double dy = a1.getY()-a2.getY();
497                                        double dz = a1.getZ()-a2.getZ();
498                                        double distance = Math.sqrt(dx*dx+dy*dy+dz*dz);
499                                        //log.info("C8-C6 Distance (Å): " + distance);
500                                        // could be a base pair
501                                        if (Math.abs(distance-10.0) < 4.0) {
502                                                boolean valid = true;
503                                                for (String atomname : RING_MAP.get(type1)) {
504                                                        Atom a = g1.getAtom(atomname);
505                                                        if (a == null) valid = false;
506                                                }
507                                                if (valid) for (String atomname: RING_MAP.get(type2)) {
508                                                        Atom a = g2.getAtom(atomname);
509                                                        if (a == null) valid = false;
510                                                }
511                                                if (valid) {
512                                                        result.add(new Pair<Group>(g1, g2));
513                                                        pairingNames.add((useRNA ? BASE_LIST_RNA[type1]+ BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1]+ BASE_LIST_DNA[type2]));
514                                                        pairSequence += c.getAtomSequence().charAt(index1 + k);
515                                                } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
516                                        } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
517                                }
518                                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
519                        }
520                        //log.info();
521                }
522                log.info("Matched: " + pairSequence);
523                return result;
524        }
525
526
527        /**
528         * This method calculates the central frame (4x4 transformation matrix) of a single base pair.
529         * @param pair An array of the two groups that make a hypothetical pair
530         * @return The middle frame of the center of the base-pair formed
531         */
532        public Matrix4d basePairReferenceFrame(Pair<Group> pair) {
533                Integer type1 = BASE_MAP.get(pair.getFirst().getPDBName());
534                Integer type2 = BASE_MAP.get(pair.getSecond().getPDBName());
535                SuperPosition sp = new SuperPositionQCP(true);
536                if (type1 == null || type2 == null) return null;
537                PDBFileReader pdbFileReader = new PDBFileReader();
538                Structure s1, s2;
539                try {
540                        s1 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type1].getBytes()));
541                        s2 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type2].getBytes()));
542                } catch (IOException e) {
543                        e.printStackTrace();
544                        return null;
545                }
546                Group std1 = s1.getChain("A").getAtomGroup(0);
547                Group std2 = s2.getChain("A").getAtomGroup(0);
548
549                Point3d[] pointref = new Point3d[std1.getAtoms().size()];
550                Point3d[] pointact = new Point3d[std1.getAtoms().size()];
551                int count = 0;
552
553                for (Atom a : std1.getAtoms()) {
554                        if (pair.getFirst().getAtom(a.getName()) == null) return null;
555                        pointref[count] = a.getCoordsAsPoint3d();
556                        pointact[count] = pair.getFirst().getAtom(a.getName()).getCoordsAsPoint3d();
557                        count++;
558                }
559                assert count == std1.getAtoms().size();
560                Matrix4d ref1 = (Matrix4d)sp.superposeAndTransform(pointact, pointref).clone();
561
562                pointref = new Point3d[std2.getAtoms().size()];
563                pointact = new Point3d[std2.getAtoms().size()];
564
565                count = 0;
566                for (Atom a : std2.getAtoms()) {
567                        if (pair.getSecond().getAtom(a.getName()) == null) return null;
568                        pointref[count] = a.getCoordsAsPoint3d();
569                        pointact[count] = pair.getSecond().getAtom(a.getName()).getCoordsAsPoint3d();
570                        count++;
571                }
572                assert count == std2.getAtoms().size();
573
574                Matrix4d temp = (Matrix4d)ref1.clone();
575                Matrix4d temp2 = (Matrix4d)temp.clone();
576                Matrix4d ref2 = sp.superposeAndTransform(pointact, pointref);
577
578                double[][] v = new double[3][4];
579                double[] y3 = new double[4];
580                double[] z3 = new double[4];
581                ref2.getColumn(1, y3);
582                ref2.getColumn(2, z3);
583                double[] z31 = new double[4];
584                ref1.getColumn(2, z31);
585                if (z3[0]*z31[0]+z3[1]*z31[1]+z3[2]*z31[2] < 0.0) {
586                        for (int i = 0; i < 3; i++) {
587                                y3[i] *= -1.0;
588                                z3[i] *= -1.0;
589                        }
590                }
591                ref2.setColumn(1, y3);
592                ref2.setColumn(2, z3);
593
594                temp.add(ref2);
595                temp.mul(0.5);
596                double[] x3 = new double[4];
597                temp.getColumn(0, x3);
598                temp.getColumn(1, y3);
599                temp.getColumn(2, z3);
600                x3 = removeComponent(x3, z3);
601                x3 = removeComponent(x3, y3);
602                y3 = removeComponent(y3, z3);
603                temp.setColumn(0, x3);
604                temp.setColumn(1, y3);
605                temp.setColumn(2, z3);
606
607                // normalize the short, long, and normal axes
608                for (int i = 0; i < 3; i++) {
609                        temp.getColumn(i, v[i]);
610                        double r = Math.sqrt(v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]);
611                        for (int j = 0; j < 3; j++) {
612                                v[i][j] /= r;
613                        }
614                        temp.setColumn(i, v[i]);
615                }
616
617                // calculate pairing parameters: buckle, propeller, opening, shear, stretch, stagger
618                temp2.invert();
619                temp2.mul(ref2);
620                pairParameters = calculateTp(temp2);
621                for (int i = 0; i < 6; i++) pairParameters[i] *= -1;
622
623                // return the central frame of the base pair
624                return temp;
625
626        }
627
628
629        @Override
630        public String toString() {
631                if (getPairingParameters() == null) return "No data";
632                StringBuilder result = new StringBuilder(10000);
633                result.append(pairingParameters.length + " base pairs\n");
634                result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n");
635                for (int i = 0; i < pairingParameters.length; i++) {
636                        result.append(pairingNames.get(i)+": ");
637                        for (int j = 0; j < 6; j++)
638                                result.append(String.format("%5.4f", pairingParameters[i][j]) + " ");
639                        for (int j = 0; j < 6; j++)
640                                result.append(String.format("%5.4f", stepParameters[i][j]) + " ");
641                        result.append("\n");
642                }
643                return result.toString();
644        }
645
646
647        // The following methods are just helper classes for the rapid analyze of base-pair geometry.
648        /**
649         * This method calculates pairing and step parameters from 4x4 transformation matrices (used internally)
650         * that comes out as a Matrix4d.
651         * @param input the 4x4 matrix representing the transformation from strand II -> strand I or pair i to pair i+1
652         * @return Six parameters as double[6]
653         */
654        public static double[] calculateTp(Matrix4d input) {
655
656                double[][] A = new double[4][4];
657                for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) {
658                        A[i][j] = input.getElement(i, j);
659                }
660                double[] M = new double[6];
661
662                double cosgamma, gamma, phi, omega, sgcp, omega2_minus_phi,
663                                sm, cm, sp, cp, sg, cg;
664
665                cosgamma = A[2][2];
666                if (cosgamma > 1.0) cosgamma = 1.0;
667                else if (cosgamma < -1.0) cosgamma = -1.0;
668
669                gamma = acos(cosgamma);
670
671                sgcp = A[1][1]*A[0][2]-A[0][1]*A[1][2];
672
673                if (gamma == 0.0) omega = -atan2(A[0][1],A[1][1]);
674                else omega = atan2(A[2][1]*A[0][2]+sgcp*A[1][2],sgcp*A[0][2]-A[2][1]*A[1][2]);
675
676                omega2_minus_phi = atan2(A[1][2],A[0][2]);
677
678                phi = omega/2.0 - omega2_minus_phi;
679
680                M[0] = gamma*sin(phi)*180.0/PI;
681                M[1] = gamma*cos(phi)*180.0/PI;
682                M[2] = omega*180.0/PI;
683
684                sm = sin(omega/2.0-phi);
685                cm = cos(omega/2.0-phi);
686                sp = sin(phi);
687                cp = cos(phi);
688                sg = sin(gamma/2.0);
689                cg = cos(gamma/2.0);
690
691                M[3] = (cm*cg*cp-sm*sp)*A[0][3]+(sm*cg*cp+cm*sp)*A[1][3]-sg*cp*A[2][3];
692                M[4] = (-cm*cg*sp-sm*cp)*A[0][3]+(-sm*cg*sp+cm*cp)*A[1][3]+sg*sp*A[2][3];
693                M[5] = (cm*sg)*A[0][3]+(sm*sg)*A[1][3]+cg*A[2][3];
694
695                return M;
696
697        }
698
699        /**
700         * This method returns the complement of a base. (used internally)
701         * @param base The letter of the base
702         * @param RNA Whether it is RNA (if false, it is DNA)
703         * @return The character representing the complement of the base
704         */
705        protected static char complementBase(char base, boolean RNA) {
706                if (base == 'A' && RNA) return 'U';
707                if (base == 'A') return 'T';
708                if (base == 'T' && !RNA) return 'A';
709                if (base == 'U' && RNA) return 'A';
710                if (base == 'C') return 'G';
711                if (base == 'G') return 'C';
712                return ' ';
713        }
714
715        /**
716         * Simple helper method for quickly checking the complement of a sequence, see also DNASequence nad RNASequence classes
717         * for more extensively useful functions not used in this narrow context of structural biology of base pairs.  (Used internally)
718         */
719        private static String complement(String sequence, boolean RNA) {
720                String result = "";
721                for (int i = sequence.length() - 1; i >= 0; i--) {
722                        result += complementBase(sequence.charAt(i), RNA);
723                }
724                return result;
725        }
726
727        /**
728         * This does a 3D Vector cross product of two vectors as double arrays. (used internally)
729         *
730         * @param a An array of length 3 or 4 (4th component is ignored)
731         * @param b An array of length 3 or 4 (4th component is ignored)
732         * @return The cross product of the vectors (just the first three components
733         */
734        @SuppressWarnings("unused")
735        private static double[] cross(double[] a, double[] b) {
736                assert a.length >= 3 && b.length >= 3;
737                double[] result = new double[4];
738                result[0] = a[1]*b[2]-a[2]*b[1];
739                result[1] = a[2]*b[0]-a[0]*b[2];
740                result[2] = a[0]*b[1]-a[1]*b[0];
741                return result;
742        }
743
744        /**
745         * This method removes any component of vector a that is along vector b. (used internally)
746         * @param a The array (vector) to remove component from
747         * @param b The component array (vector) to remove from the first
748         * @return The original array a with any component along b removed from it.
749         */
750        private static double[] removeComponent(double[] a, double[] b) {
751                double dot = 0;
752                double[] result = new double[4];
753                for (int i = 0; i < 3; i++) {
754                        dot += a[i]*b[i];
755                }
756                for (int i = 0; i < 3; i++) {
757                        result[i] = a[i]-dot*b[i];
758                }
759                return result;
760
761        }
762
763        /**
764         * This method finds the longest common substring between two strings. (used internally)
765         * @param s1 The first string
766         * @param s2 The second string
767         * @return The substring itself
768         */
769        private static String longestCommonSubstring(String s1, String s2) {
770                int start = 0;
771                int max = 0;
772                for (int i = 0; i < s1.length(); i++) {
773                        for (int j = 0; j < s2.length(); j++) {
774                                int x = 0;
775                                while (s1.charAt(i + x) == s2.charAt(j + x)) {
776                                        x++;
777                                        if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break;
778                                }
779                                if (x > max) {
780                                        max = x;
781                                        start = i;
782                                }
783                        }
784                }
785                return s1.substring(start, (start + max));
786        }
787
788        /**
789         * This returns true if a is the complement of b, false otherwise. (used internally)
790         * @param a First letter
791         * @param b Potential matching letter
792         * @param RNA Whether it is RNA (if false, DNA rules are used)
793         * @return True if the bases are complementary.
794         */
795        protected static boolean match(char a, char b, boolean RNA) {
796                if (a == 'A' && b == 'T' && !RNA) return true;
797                if (a == 'A' && b == 'U' && RNA) return true;
798                if (a == 'T' && b == 'A' && !RNA) return true;
799                if (a == 'U' && b == 'A' && RNA) return true;
800                if (a == 'G' && b == 'C') return true;
801                if (a == 'C' && b == 'G') return true;
802                return false;
803        }
804}