001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 *
021 */
022package org.biojava.nbio.structure.basepairs;
023
024import org.biojava.nbio.structure.Structure;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Group;
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.contact.Pair;
029import org.biojava.nbio.structure.geometry.SuperPosition;
030import org.biojava.nbio.structure.geometry.SuperPositionQCP;
031import org.biojava.nbio.structure.io.PDBFileReader;
032
033import javax.vecmath.Matrix4d;
034import javax.vecmath.Point3d;
035import java.io.ByteArrayInputStream;
036import java.io.IOException;
037import java.io.Serializable;
038import java.util.List;
039import java.util.Arrays;
040import java.util.ArrayList;
041import java.util.Map;
042import java.util.HashMap;
043
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import static java.lang.Math.sin;
048import static java.lang.Math.cos;
049import static java.lang.Math.atan2;
050import static java.lang.Math.acos;
051import static java.lang.Math.PI;
052
053/**
054 * This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters for any nucleic
055 * acid containing structure that has the information about the core base-pair rings.
056 * Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858
057 *
058 * The method that is usually overridden is findPairs(), this base implementation is used for a large-scale
059 * analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost
060 * 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017).
061 * Those who study tertiary structures for RNA folding should use the TertiaryBasePairParameters,
062 * because this base class is only looking for base pairs between separate strands that exactly line up.
063 * To relax the lining up policy and allow for non-canonical base pairs, use the MismatchedBasePairParameters
064 * class, which will not consider intra-strand base pairing.
065 *
066 * @author Luke Czapla
067 * @since 5.0.0
068 *
069 */
070public class BasePairParameters implements Serializable {
071
072    private static final long serialVersionUID = 6214502385L;
073    private static Logger log = LoggerFactory.getLogger(BasePairParameters.class);
074
075    // See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html
076    // and the paper cited at the top of this class (also as Table 1).
077    // These are hard-coded to avoid problems with resource paths.
078    public static final String[] STANDARD_BASES = new String[] {
079            "SEQRES   1 A    1  A\n" +
080                    "ATOM      2  N9    A A   1      -1.291   4.498   0.000\n" +
081                    "ATOM      3  C8    A A   1       0.024   4.897   0.000\n" +
082                    "ATOM      4  N7    A A   1       0.877   3.902   0.000\n" +
083                    "ATOM      5  C5    A A   1       0.071   2.771   0.000\n" +
084                    "ATOM      6  C6    A A   1       0.369   1.398   0.000\n" +
085                    "ATOM      8  N1    A A   1      -0.668   0.532   0.000\n" +
086                    "ATOM      9  C2    A A   1      -1.912   1.023   0.000\n" +
087                    "ATOM     10  N3    A A   1      -2.320   2.290   0.000\n" +
088                    "ATOM     11  C4    A A   1      -1.267   3.124   0.000\n" +
089                    "END",
090            "SEQRES   1 A    1  G\n" +
091                    "ATOM      2  N9    G A   1      -1.289   4.551   0.000\n" +
092                    "ATOM      3  C8    G A   1       0.023   4.962   0.000\n" +
093                    "ATOM      4  N7    G A   1       0.870   3.969   0.000\n" +
094                    "ATOM      5  C5    G A   1       0.071   2.833   0.000\n" +
095                    "ATOM      6  C6    G A   1       0.424   1.460   0.000\n" +
096                    "ATOM      8  N1    G A   1      -0.700   0.641   0.000\n" +
097                    "ATOM      9  C2    G A   1      -1.999   1.087   0.000\n" +
098                    "ATOM     11  N3    G A   1      -2.342   2.364   0.001\n" +
099                    "ATOM     12  C4    G A   1      -1.265   3.177   0.000\n" +
100                    "END",
101            "SEQRES   1 A    1  T\n" +
102                    "ATOM      2  N1    T A   1      -1.284   4.500   0.000\n" +
103                    "ATOM      3  C2    T A   1      -1.462   3.135   0.000\n" +
104                    "ATOM      5  N3    T A   1      -0.298   2.407   0.000\n" +
105                    "ATOM      6  C4    T A   1       0.994   2.897   0.000\n" +
106                    "ATOM      8  C5    T A   1       1.106   4.338   0.000\n" +
107                    "ATOM     10  C6    T A   1      -0.024   5.057   0.000\n" +
108                    "END",
109            "SEQRES   1 A    1  C\n" +
110                    "ATOM      2  N1    C A   1      -1.285   4.542   0.000\n" +
111                    "ATOM      3  C2    C A   1      -1.472   3.158   0.000\n" +
112                    "ATOM      5  N3    C A   1      -0.391   2.344   0.000\n" +
113                    "ATOM      6  C4    C A   1       0.837   2.868   0.000\n" +
114                    "ATOM      8  C5    C A   1       1.056   4.275   0.000\n" +
115                    "ATOM      9  C6    C A   1      -0.023   5.068   0.000\n" +
116                    "END",
117            "SEQRES   1 A    1  U\n" +
118                    "ATOM      2  N1    U A   1      -1.284   4.500   0.000\n" +
119                    "ATOM      3  C2    U A   1      -1.462   3.131   0.000\n" +
120                    "ATOM      5  N3    U A   1      -0.302   2.397   0.000\n" +
121                    "ATOM      6  C4    U A   1       0.989   2.884   0.000\n" +
122                    "ATOM      8  C5    U A   1       1.089   4.311   0.000\n" +
123                    "ATOM      9  C6    U A   1      -0.024   5.053   0.000\n"
124    };
125
126    // this is also hard-coded data about standard WC base pairs for both DNA and RNA
127    protected static final String[] BASE_LIST_DNA = {"A", "G", "T", "C"};
128    protected static final String[] BASE_LIST_RNA = {"A", "G", "U", "C"};
129    protected static final Map<String, Integer> BASE_MAP;
130   // private static List<String> RNAspecific = Arrays.asList("U", "URA"),
131   //        DNAspecific = Arrays.asList("DC", "C", "CYT");
132    protected static final Map<Integer, List<String>> RING_MAP;
133    static {
134        BASE_MAP = new HashMap<>();
135        BASE_MAP.put("DA", 0); BASE_MAP.put("ADE", 0); BASE_MAP.put("A", 0);
136        BASE_MAP.put("DG", 1); BASE_MAP.put("GUA", 1); BASE_MAP.put("G", 1);
137        BASE_MAP.put("DT", 2); BASE_MAP.put("THY", 2); BASE_MAP.put("T", 2); BASE_MAP.put("U", 2); BASE_MAP.put("URA", 2);
138        BASE_MAP.put("DC", 3); BASE_MAP.put("CYT", 3); BASE_MAP.put("C", 3);
139
140        RING_MAP = new HashMap<>();
141        RING_MAP.put(0, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
142        RING_MAP.put(1, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9"));
143        RING_MAP.put(2, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
144        RING_MAP.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1"));
145   }
146
147    protected Structure structure;
148    protected boolean canonical = true;
149    protected boolean useRNA = false;
150    protected boolean nonredundant = false;
151    protected double[] pairParameters;
152
153    // this is the main data that the user wants to get back out from the procedure.
154    protected String pairSequence = "";
155    protected double[][] pairingParameters;
156    protected double[][] stepParameters;
157    protected List<String> pairingNames = new ArrayList<>();
158    protected List<Matrix4d> referenceFrames = new ArrayList<>();
159
160
161    /**
162     * This constructor takes a Structure object, finds base pair and base-pair step parameters
163     * for double-helical regions within the structure.
164     * @param structure The already-loaded structure to analyze.
165     * @param useRNA whether to look for canonical RNA pairs.  By default (false) it analyzes DNA.
166     * @param removeDups whether to only look for base-pair parameters for each unique sequence in
167     *  the structure (if set to <i>true</i>)
168     * @param canonical Whether to consider only Watson-Crick base pairs
169     */
170    public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) {
171        this.structure = structure;
172        this.useRNA = useRNA;
173        this.canonical = canonical;
174        this.nonredundant = removeDups;
175
176    }
177
178    /**
179     * This constructor takes a Structure object, whether to use RNA, and whether to remove duplicate sequences.
180     * @param structure The already-loaded structure to analyze.
181     * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
182     * @param removeDups if true, duplicate sequences will not be considered.  This is for the analysis of X-ray structures from
183     *                   RCSB, where there may be identical or similar units.
184     */
185    public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) {
186        this(structure, useRNA, removeDups, false);
187    }
188
189    /**
190     * This constructor takes a Structure object, and whether to use the RNA standard bases.
191     * @param structure The already-loaded structure to analyze.
192     * @param useRNA if true, the RNA standard bases will be used.  Otherwise, if false, it will work on standard DNA bases.
193     */
194    public BasePairParameters(Structure structure, boolean useRNA) {
195        this(structure, useRNA, false, false);
196    }
197
198    /**
199     * This constructor takes a Structure object, finds base pair and base-pair step parameters
200     * for double-helical regions within the structure for only canonical DNA pairs.
201     * @param structure The already-loaded structure to analyze.
202     */
203    public BasePairParameters(Structure structure) {
204        this(structure, false, false, true);
205    }
206
207
208    /**
209     * This method is the main function call to extract all step parameters, pairing parameters, and sequence
210     * information from the Structure object provided to the constructor.
211     * @return This same object with the populated data, convenient for output
212     *  (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>)
213     */
214    public BasePairParameters analyze() {
215        if (structure == null) {
216            pairingParameters = null;
217            stepParameters = null;
218            return this;
219        }
220        List<Chain> nucleics = this.getNucleicChains(nonredundant);
221        List<Pair<Group>> pairs = this.findPairs(nucleics);
222        this.pairingParameters = new double[pairs.size()][6];
223        this.stepParameters = new double[pairs.size()][6];
224        Matrix4d lastStep;
225        Matrix4d currentStep = null;
226        for (int i = 0; i < pairs.size(); i++) {
227            lastStep = currentStep;
228            currentStep = this.basePairReferenceFrame(pairs.get(i));
229            referenceFrames.add((Matrix4d)currentStep.clone());
230            for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j];
231            if (i != 0) {
232                lastStep.invert();
233                lastStep.mul(currentStep);
234                double[] sparms = calculateTp(lastStep);
235                for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j];
236            }
237        }
238        return this;
239    }
240
241
242
243    /**
244     * This method returns the total number of base pairs that were found, used after the call to analyze().
245     * @return An integer value, number of base pairs
246     */
247    public int getLength() {
248        if (structure == null || pairParameters == null) throw new IllegalArgumentException("This structure is not analyzed or not initialized.");
249        return pairingParameters.length;
250    }
251
252
253    /**
254     * This method reports all the pair parameters, in the order of:
255     * buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å).
256     * @return A double[][] with length equal to number of base pairs for rows, and 6 columns
257     */
258    public double[][] getPairingParameters() {
259        return pairingParameters;
260    }
261
262    /**
263     * This method reports all the base-pair step parameters, in the order of:
264     * tilt, roll, twist (in degrees), shift, slide, rise (in Å).
265     * @return A double[][] with length equal to number of base pairs (the first row 0 has no step
266     *  and therefore is six zeroes), and 6 columns.
267     */
268    public double[][] getStepParameters() {
269        return stepParameters;
270    }
271
272
273    /**
274     * This method returns the primary strand's sequence where parameters were found.
275     * There are spaces in the string anywhere there was a break in the helix or when
276     * it goes from one helix to another helix in the structure. (the "step" is still returned)
277     * @return String of primary sequence with spaces between gaps and new helices.
278     */
279    public String getPairSequence() {
280        return pairSequence;
281    }
282
283
284    /**
285     * This method returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the
286     * list.  The first character is the leading strand base and the second character is the complementary base
287     * @return
288     */
289    public List<String> getPairingNames() {
290        return pairingNames;
291    }
292
293    public List<Matrix4d> getReferenceFrames() {
294        return referenceFrames;
295    }
296
297    /**
298     * This method is an internal test that the base pair specified is within a valid range.  If not, it throws an exception
299     * with a message.
300     * @param bp The index of the base pair or base-pair step to return.
301     */
302    private void checkArgument(int bp) {
303        if (bp < 0 || bp >= getPairingParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
304    }
305
306    /**
307     * This method returns the buckle in degrees for the given base pair
308     * @param bp the number of the base pair (starting with 0)
309     * @return the value as a double (in degrees)
310     */
311    public Double getBuckle(int bp) {
312        checkArgument(bp);
313        return pairingParameters[bp][0];
314    }
315
316    /**
317     * This method returns the propeller ("propeller-twist") in degrees for the given base pair
318     * @param bp the number of the base pair (starting with 0)
319     * @return the value as a double (in degrees)
320     */
321    public Double getPropeller(int bp) {
322        checkArgument(bp);
323        return pairingParameters[bp][1];
324    }
325
326    /**
327     * This method returns the opening in degrees for the given base pair
328     * @param bp the number of the base pair (starting with 0)
329     * @return the value as a double (in degrees)
330     */
331    public Double getOpening(int bp) {
332        checkArgument(bp);
333        return pairingParameters[bp][2];
334    }
335
336    /**
337     * This method returns the shear in Å for the given base pair
338     * @param bp the number of the base pair (starting with 0)
339     * @return the value as a double (in Å)
340     */
341    public Double getShear(int bp) {
342        checkArgument(bp);
343        return pairingParameters[bp][3];
344    }
345
346    /**
347     * This method returns the stretch in Å for the given base pair
348     * @param bp the number of the base pair (starting with 0)
349     * @return the value as a double (in Å)
350     */
351    public Double getStretch(int bp) {
352        checkArgument(bp);
353        return pairingParameters[bp][4];
354    }
355
356    /**
357     * This method returns the stagger in Å for the given base pair
358     * @param bp the number of the base pair (starting with 0)
359     * @return the value as a double (in Å)
360     */
361    public Double getStagger(int bp) {
362        checkArgument(bp);
363        return pairingParameters[bp][5];
364    }
365
366    /**
367     * This method returns the tilt for the given base pair, relative to the one before it.
368     * @param bp the number of the base pair (starting with 0)
369     * @return the value as a double (in degrees)
370     */
371    public Double getTilt(int bp) {
372        checkArgument(bp);
373        return stepParameters[bp][0];
374    }
375
376    /**
377     * This method returns the roll for the given base pair, relative to the one before it.
378     * @param bp the number of the base pair (starting with 0)
379     * @return the value as a double (in degrees)
380     */
381    public Double getRoll(int bp) {
382        if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
383        return stepParameters[bp][1];
384    }
385
386    /**
387     * This method returns the twist for the given base pair, relative to the one before it.
388     * @param bp the number of the base pair (starting with 0)
389     * @return the value as a double (in degrees)
390     */
391    public Double getTwist(int bp) {
392        if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
393        return stepParameters[bp][2];
394    }
395
396    /**
397     * Return the shift for the given base pair, relative to the one before it.
398     * @param bp the number of the base pair (starting with 0)
399     * @return the value as a double (in Å)
400     */
401    public Double getShift(int bp) {
402        if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
403        return stepParameters[bp][3];
404    }
405
406    /**
407     * This method returns the slide for the given base pair, relative to the one before it.
408     * @param bp the number of the base pair (starting with 0)
409     * @return the value as a double (in Å)
410     */
411    public Double getSlide(int bp) {
412        if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
413        return stepParameters[bp][4];
414    }
415
416    /**
417     * This method returns the rise for the given base pair, relative to the one before it.
418     * @param bp the number of the base pair (starting with 0)
419     * @return the value as a double (in Å)
420     */
421    public Double getRise(int bp) {
422        if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range.");
423        return stepParameters[bp][5];
424    }
425
426
427    /**
428     * This method reports all the nucleic acid chains and has an option to remove duplicates if you
429     * are considering an analysis of only unique DNA or RNA helices in the Structure.
430     * @param removeDups If true, it will ignore duplicate chains
431     * @return A list of all the nucleic acid chains in order of the Structure
432     */
433    public List<Chain> getNucleicChains(boolean removeDups) {
434        if (structure == null) return new ArrayList<>();
435        List<Chain> chains = structure.getChains();
436        List<Chain> result = new ArrayList<>();
437        for (Chain c: chains) {
438            if (c.isNucleicAcid()) {
439                result.add(c);
440            }
441        }
442        if (removeDups) for (int i = 0; i < result.size(); i++) {
443            for (int j = i+2; j < result.size(); j++) {
444                // remove duplicate sequences (structures with two or more identical units)
445                if (result.get(i).getAtomSequence().equals(result.get(j).getAtomSequence())) {
446                    result.remove(j);
447                }
448            }
449        }
450        return result;
451    }
452
453    /**
454     * This method performs a search for base pairs in the structure.  The criteria is alignment of
455     * sequences and the canonical base pairs of DNA or RNA. Use MismatchedBasePairParameters
456     * or TertiaryBasePairParameters for finding higher-order associations.
457     * @param chains The list of chains already found to be nucleic acids
458     * @return The list of corresponding Watson-Crick groups as pairs, as a Pair of nucleic acid Groups
459     */
460    public List<Pair<Group>> findPairs(List<Chain> chains) {
461        List<Pair<Group>> result = new ArrayList<>();
462        for (int i = 0; i < chains.size(); i++) {
463            Chain c = chains.get(i);
464            for (int j = i+1; j < chains.size(); j++) {
465                String complement = complement(chains.get(j).getAtomSequence(), useRNA);
466                String match = longestCommonSubstring(c.getAtomSequence(), complement);
467                if (log.isDebugEnabled()) {
468                    log.debug(c.getAtomSequence() + " " + chains.get(j).getAtomSequence() + " " + match);
469                }
470                int index1 = c.getAtomSequence().indexOf(match);
471                int index2 = complement.length() - complement.indexOf(match) - 1;
472                for (int k = 0; k < match.length(); k++) {
473                    Group g1 = c.getAtomGroup(index1+k);
474                    Group g2 = chains.get(j).getAtomGroup(index2-k);
475                    Integer type1 = BASE_MAP.get(g1.getPDBName());
476                    Integer type2 = BASE_MAP.get(g2.getPDBName());
477                    if (type1 == null || type2 == null) {
478                        if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
479                        continue;
480                    }
481                    Atom a1 = g1.getAtom(RING_MAP.get(type1).get(0));
482                    Atom a2 = g2.getAtom(RING_MAP.get(type2).get(0));
483
484                    if (a1 == null) {
485                        log.info("Error processing " + g1.getPDBName());
486                        if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
487                        continue;
488                    }
489                    if (a2 == null) {
490                        log.info("Error processing " + g2.getPDBName());
491                        if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
492                        continue;
493                    }
494
495                    double dx = a1.getX()-a2.getX();
496                    double dy = a1.getY()-a2.getY();
497                    double dz = a1.getZ()-a2.getZ();
498                    double distance = Math.sqrt(dx*dx+dy*dy+dz*dz);
499                    //log.info("C8-C6 Distance (Å): " + distance);
500                    // could be a base pair
501                    if (Math.abs(distance-10.0) < 4.0) {
502                        boolean valid = true;
503                        for (String atomname : RING_MAP.get(type1)) {
504                            Atom a = g1.getAtom(atomname);
505                            if (a == null) valid = false;
506                        }
507                        if (valid) for (String atomname: RING_MAP.get(type2)) {
508                            Atom a = g2.getAtom(atomname);
509                            if (a == null) valid = false;
510                        }
511                        if (valid) {
512                            result.add(new Pair<Group>(g1, g2));
513                            pairingNames.add((useRNA ? BASE_LIST_RNA[type1]+ BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1]+ BASE_LIST_DNA[type2]));
514                            pairSequence += c.getAtomSequence().charAt(index1 + k);
515                        } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
516                    } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
517                }
518                if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' ';
519            }
520            //log.info();
521        }
522        log.info("Matched: " + pairSequence);
523        return result;
524    }
525
526
527    /**
528     * This method calculates the central frame (4x4 transformation matrix) of a single base pair.
529     * @param pair An array of the two groups that make a hypothetical pair
530     * @return The middle frame of the center of the base-pair formed
531     */
532    public Matrix4d basePairReferenceFrame(Pair<Group> pair) {
533        Integer type1 = BASE_MAP.get(pair.getFirst().getPDBName());
534        Integer type2 = BASE_MAP.get(pair.getSecond().getPDBName());
535        SuperPosition sp = new SuperPositionQCP(true);
536        if (type1 == null || type2 == null) return null;
537        PDBFileReader pdbFileReader = new PDBFileReader();
538        Structure s1, s2;
539        try {
540            s1 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type1].getBytes()));
541            s2 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type2].getBytes()));
542        } catch (IOException e) {
543            e.printStackTrace();
544            return null;
545        }
546        Group std1 = s1.getChain("A").getAtomGroup(0);
547        Group std2 = s2.getChain("A").getAtomGroup(0);
548
549        Point3d[] pointref = new Point3d[std1.getAtoms().size()];
550        Point3d[] pointact = new Point3d[std1.getAtoms().size()];
551        int count = 0;
552
553        for (Atom a : std1.getAtoms()) {
554            if (pair.getFirst().getAtom(a.getName()) == null) return null;
555            pointref[count] = a.getCoordsAsPoint3d();
556            pointact[count] = pair.getFirst().getAtom(a.getName()).getCoordsAsPoint3d();
557            count++;
558        }
559        assert count == std1.getAtoms().size();
560        Matrix4d ref1 = (Matrix4d)sp.superposeAndTransform(pointact, pointref).clone();
561
562        pointref = new Point3d[std2.getAtoms().size()];
563        pointact = new Point3d[std2.getAtoms().size()];
564
565        count = 0;
566        for (Atom a : std2.getAtoms()) {
567            if (pair.getSecond().getAtom(a.getName()) == null) return null;
568            pointref[count] = a.getCoordsAsPoint3d();
569            pointact[count] = pair.getSecond().getAtom(a.getName()).getCoordsAsPoint3d();
570            count++;
571        }
572        assert count == std2.getAtoms().size();
573
574        Matrix4d temp = (Matrix4d)ref1.clone();
575        Matrix4d temp2 = (Matrix4d)temp.clone();
576        Matrix4d ref2 = sp.superposeAndTransform(pointact, pointref);
577
578        double[][] v = new double[3][4];
579        double[] y3 = new double[4];
580        double[] z3 = new double[4];
581        ref2.getColumn(1, y3);
582        ref2.getColumn(2, z3);
583        double[] z31 = new double[4];
584        ref1.getColumn(2, z31);
585        if (z3[0]*z31[0]+z3[1]*z31[1]+z3[2]*z31[2] < 0.0) {
586            for (int i = 0; i < 3; i++) {
587                y3[i] *= -1.0;
588                z3[i] *= -1.0;
589            }
590        }
591        ref2.setColumn(1, y3);
592        ref2.setColumn(2, z3);
593
594        temp.add(ref2);
595        temp.mul(0.5);
596        double[] x3 = new double[4];
597        temp.getColumn(0, x3);
598        temp.getColumn(1, y3);
599        temp.getColumn(2, z3);
600        x3 = removeComponent(x3, z3);
601        x3 = removeComponent(x3, y3);
602        y3 = removeComponent(y3, z3);
603        temp.setColumn(0, x3);
604        temp.setColumn(1, y3);
605        temp.setColumn(2, z3);
606
607        // normalize the short, long, and normal axes
608        for (int i = 0; i < 3; i++) {
609            temp.getColumn(i, v[i]);
610            double r = Math.sqrt(v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]);
611            for (int j = 0; j < 3; j++) {
612                v[i][j] /= r;
613            }
614            temp.setColumn(i, v[i]);
615        }
616
617        // calculate pairing parameters: buckle, propeller, opening, shear, stretch, stagger
618        temp2.invert();
619        temp2.mul(ref2);
620        pairParameters = calculateTp(temp2);
621        for (int i = 0; i < 6; i++) pairParameters[i] *= -1;
622
623        // return the central frame of the base pair
624        return temp;
625
626    }
627
628
629    @Override
630    public String toString() {
631        if (getPairingParameters() == null) return "No data";
632        StringBuilder result = new StringBuilder(10000);
633        result.append(pairingParameters.length + " base pairs\n");
634        result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n");
635        for (int i = 0; i < pairingParameters.length; i++) {
636            result.append(pairingNames.get(i)+": ");
637            for (int j = 0; j < 6; j++)
638                result.append(String.format("%5.4f", pairingParameters[i][j]) + " ");
639            for (int j = 0; j < 6; j++)
640                result.append(String.format("%5.4f", stepParameters[i][j]) + " ");
641            result.append("\n");
642        }
643        return result.toString();
644    }
645
646
647    // The following methods are just helper classes for the rapid analyze of base-pair geometry.
648    /**
649     * This method calculates pairing and step parameters from 4x4 transformation matrices (used internally)
650     * that comes out as a Matrix4d.
651     * @param input the 4x4 matrix representing the transformation from strand II -> strand I or pair i to pair i+1
652     * @return Six parameters as double[6]
653     */
654    public static double[] calculateTp(Matrix4d input) {
655
656        double[][] A = new double[4][4];
657        for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) {
658            A[i][j] = input.getElement(i, j);
659        }
660        double[] M = new double[6];
661
662        double cosgamma, gamma, phi, omega, sgcp, omega2_minus_phi,
663                sm, cm, sp, cp, sg, cg;
664
665        cosgamma = A[2][2];
666        if (cosgamma > 1.0) cosgamma = 1.0;
667        else if (cosgamma < -1.0) cosgamma = -1.0;
668
669        gamma = acos(cosgamma);
670
671        sgcp = A[1][1]*A[0][2]-A[0][1]*A[1][2];
672
673        if (gamma == 0.0) omega = -atan2(A[0][1],A[1][1]);
674        else omega = atan2(A[2][1]*A[0][2]+sgcp*A[1][2],sgcp*A[0][2]-A[2][1]*A[1][2]);
675
676        omega2_minus_phi = atan2(A[1][2],A[0][2]);
677
678        phi = omega/2.0 - omega2_minus_phi;
679
680        M[0] = gamma*sin(phi)*180.0/PI;
681        M[1] = gamma*cos(phi)*180.0/PI;
682        M[2] = omega*180.0/PI;
683
684        sm = sin(omega/2.0-phi);
685        cm = cos(omega/2.0-phi);
686        sp = sin(phi);
687        cp = cos(phi);
688        sg = sin(gamma/2.0);
689        cg = cos(gamma/2.0);
690
691        M[3] = (cm*cg*cp-sm*sp)*A[0][3]+(sm*cg*cp+cm*sp)*A[1][3]-sg*cp*A[2][3];
692        M[4] = (-cm*cg*sp-sm*cp)*A[0][3]+(-sm*cg*sp+cm*cp)*A[1][3]+sg*sp*A[2][3];
693        M[5] = (cm*sg)*A[0][3]+(sm*sg)*A[1][3]+cg*A[2][3];
694
695        return M;
696
697    }
698
699    /**
700     * This method returns the complement of a base. (used internally)
701     * @param base The letter of the base
702     * @param RNA Whether it is RNA (if false, it is DNA)
703     * @return The character representing the complement of the base
704     */
705    protected static char complementBase(char base, boolean RNA) {
706        if (base == 'A' && RNA) return 'U';
707        if (base == 'A') return 'T';
708        if (base == 'T' && !RNA) return 'A';
709        if (base == 'U' && RNA) return 'A';
710        if (base == 'C') return 'G';
711        if (base == 'G') return 'C';
712        return ' ';
713    }
714
715    /**
716     * Simple helper method for quickly checking the complement of a sequence, see also DNASequence nad RNASequence classes
717     * for more extensively useful functions not used in this narrow context of structural biology of base pairs.  (Used internally)
718     */
719    private static String complement(String sequence, boolean RNA) {
720        String result = "";
721        for (int i = sequence.length() - 1; i >= 0; i--) {
722            result += complementBase(sequence.charAt(i), RNA);
723        }
724        return result;
725    }
726
727    /**
728     * This does a 3D Vector cross product of two vectors as double arrays. (used internally)
729     *
730     * @param a An array of length 3 or 4 (4th component is ignored)
731     * @param b An array of length 3 or 4 (4th component is ignored)
732     * @return The cross product of the vectors (just the first three components
733     */
734    @SuppressWarnings("unused")
735        private static double[] cross(double[] a, double[] b) {
736        assert a.length >= 3 && b.length >= 3;
737        double[] result = new double[4];
738        result[0] = a[1]*b[2]-a[2]*b[1];
739        result[1] = a[2]*b[0]-a[0]*b[2];
740        result[2] = a[0]*b[1]-a[1]*b[0];
741        return result;
742    }
743
744    /**
745     * This method removes any component of vector a that is along vector b. (used internally)
746     * @param a The array (vector) to remove component from
747     * @param b The component array (vector) to remove from the first
748     * @return The original array a with any component along b removed from it.
749     */
750    private static double[] removeComponent(double[] a, double[] b) {
751        double dot = 0;
752        double[] result = new double[4];
753        for (int i = 0; i < 3; i++) {
754            dot += a[i]*b[i];
755        }
756        for (int i = 0; i < 3; i++) {
757            result[i] = a[i]-dot*b[i];
758        }
759        return result;
760
761    }
762
763    /**
764     * This method finds the longest common substring between two strings. (used internally)
765     * @param s1 The first string
766     * @param s2 The second string
767     * @return The substring itself
768     */
769    private static String longestCommonSubstring(String s1, String s2) {
770        int start = 0;
771        int max = 0;
772        for (int i = 0; i < s1.length(); i++) {
773            for (int j = 0; j < s2.length(); j++) {
774                int x = 0;
775                while (s1.charAt(i + x) == s2.charAt(j + x)) {
776                    x++;
777                    if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break;
778                }
779                if (x > max) {
780                    max = x;
781                    start = i;
782                }
783            }
784        }
785        return s1.substring(start, (start + max));
786    }
787
788    /**
789     * This returns true if a is the complement of b, false otherwise. (used internally)
790     * @param a First letter
791     * @param b Potential matching letter
792     * @param RNA Whether it is RNA (if false, DNA rules are used)
793     * @return True if the bases are complementary.
794     */
795    protected static boolean match(char a, char b, boolean RNA) {
796        if (a == 'A' && b == 'T' && !RNA) return true;
797        if (a == 'A' && b == 'U' && RNA) return true;
798        if (a == 'T' && b == 'A' && !RNA) return true;
799        if (a == 'U' && b == 'A' && RNA) return true;
800        if (a == 'G' && b == 'C') return true;
801        if (a == 'C' && b == 'G') return true;
802        return false;
803    }
804}