001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.util;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.biojava.nbio.core.sequence.template.Sequence;
027
028public class SequenceTools {
029
030        protected static final String NUCLEOTIDE_LETTERS = "GCTAUXN";
031
032        /**
033         * Cyclically permute the characters in {@code string} <em>forward</em> by {@code n} elements.
034         * @param string The string to permute
035         * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable
036         */
037        public static String permuteCyclic(String string, int n) {
038                // single letters are char[]; full names are Character[]
039                Character[] permuted = new Character[string.length()];
040                char[] c = string.toCharArray();
041                Character[] charArray = new Character[c.length];
042                for (int i = 0; i < c.length; i++) {
043                        charArray[i] = c[i];
044                }
045                permuteCyclic(charArray, permuted, n);
046                char[] p = new char[permuted.length];
047                for (int i = 0; i < p.length; i++) {
048                        p[i] = permuted[i];
049                }
050                return String.valueOf(p);
051        }
052
053        /**
054         * Cyclically permute {@code array} <em>forward</em> by {@code n} elements.
055         * @param array The original result; will not be changed
056         * @param fill The permuted result will be filled into this array
057         * @param n The number of elements to permute by; can be positive or negative; values greater than the length of the array are acceptable
058         */
059        public static <T> void permuteCyclic(T[] array, T[] fill, int n) {
060                if (array.length != fill.length) throw new IllegalArgumentException("Lengths do not match");
061                if (n < 0) n = array.length + n;
062                while (n > array.length) {
063                        n -= array.length;
064                }
065                for (int i = 0; i < array.length; i++) {
066                        if (i + n < array.length) {
067                                fill[i] = array[i + n];
068                        } else {
069                                fill[i] = array[i - array.length + n];
070                        }
071                }
072        }
073
074        public static int percentNucleotideSequence(String sequence)
075        {
076                        if (sequence == null || sequence.length() == 0) return 0;
077
078                        int l = sequence.length();
079                        int n =0;
080
081                        for (int i = 0; i < l; i++)
082                        {
083                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
084                                        {
085                                                        continue;
086                                        }
087                                        n++;
088                        }
089                        return (100 * n) / l;
090        }
091
092        public static boolean isNucleotideSequence(String sequence)
093        {
094                        if (sequence == null || sequence.length() == 0) return false;
095
096                        int l = sequence.length();
097                        for (int i = 0; i < l; i++)
098                        {
099                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
100                                        {
101                                                        return false;
102                                        }
103                        }
104                        return true;
105        }
106
107        public Sequence<?> getSequenceFromString(String sequence) throws CompoundNotFoundException {
108
109
110                if( isNucleotideSequence(sequence)) {
111                        return  new DNASequence(sequence);
112                } else {
113                        return new ProteinSequence(sequence);
114                }
115
116        }
117
118        /** A method to check whether an array of sequences contains at least two sequences having an equal length.
119         *
120         * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences
121         * @return true if any two sequences are of an equal length
122         */
123        public static boolean equalLengthSequences(ProteinSequence[] sequences) {
124
125                for (int i=0; i<sequences.length-1; i++) {
126                        if (sequences[i]==null)
127                                continue;
128                        for (int j=i+1; j<sequences.length; j++) {
129                                if (sequences[j]==null)
130                                        continue;
131                                if (sequences[i].getLength() == sequences[j].getLength())
132                                        return true;
133                        }
134                }
135                return false;
136        }
137}