001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.util;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.biojava.nbio.core.sequence.template.Sequence;
027
028public class SequenceTools {
029
030        protected static final String NUCLEOTIDE_LETTERS = "GCTAUXN";
031
032        /**
033         * Cyclically permute the characters in {@code string} <em>forward</em> by {@code n} elements.
034         * @param string The string to permute
035         * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable
036         */
037        public static String permuteCyclic(String string, int n) {
038                // single letters are char[]; full names are Character[]
039                Character[] permuted = new Character[string.length()];
040                char[] c = string.toCharArray();
041                Character[] charArray = new Character[c.length];
042                for (int i = 0; i < c.length; i++) {
043                        charArray[i] = c[i];
044                }
045                permuteCyclic(charArray, permuted, n);
046                char[] p = new char[permuted.length];
047                for (int i = 0; i < p.length; i++) {
048                        p[i] = permuted[i];
049                }
050                return String.valueOf(p);
051        }
052
053        /**
054         * Improved implementation that is generally 10-100x faster, and fixes some edge-case bugs.
055         * @param string The string to permute
056         * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable
057         * @return
058         */
059        public static String permuteCyclic2(String string, int n) {
060                String toMutate = string + string;
061                n = n % string.length();
062                if (n < 0) {
063                        n = string.length() + n;
064                }
065                return toMutate.substring(n, n + string.length());
066        }
067
068        /**
069         * Cyclically permute {@code array} <em>forward</em> by {@code n} elements.
070         * @param array The original result; will not be changed
071         * @param fill The permuted result will be filled into this array
072         * @param n The number of elements to permute by; can be positive or negative; values greater than the length of the array are acceptable
073         */
074        public static <T> void permuteCyclic(T[] array, T[] fill, int n) {
075                if (array.length != fill.length) throw new IllegalArgumentException("Lengths do not match");
076                if (n < 0) n = array.length + n;
077                while (n > array.length) {
078                        n -= array.length;
079                }
080                for (int i = 0; i < array.length; i++) {
081                        if (i + n < array.length) {
082                                fill[i] = array[i + n];
083                        } else {
084                                fill[i] = array[i - array.length + n];
085                        }
086                }
087        }
088
089        public static int percentNucleotideSequence(String sequence)
090        {
091                        if (sequence == null || sequence.length() == 0) return 0;
092
093                        int l = sequence.length();
094                        int n =0;
095
096                        for (int i = 0; i < l; i++)
097                        {
098                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
099                                        {
100                                                        continue;
101                                        }
102                                        n++;
103                        }
104                        return (100 * n) / l;
105        }
106
107        public static boolean isNucleotideSequence(String sequence)
108        {
109                        if (sequence == null || sequence.length() == 0) return false;
110
111                        int l = sequence.length();
112                        for (int i = 0; i < l; i++)
113                        {
114                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
115                                        {
116                                                        return false;
117                                        }
118                        }
119                        return true;
120        }
121
122        /**
123         * Attempts to parse String as a DNA sequence first.<br/>
124         * If this fails it tries to  parse as a ProteinSequence.
125         * <br/>
126         * This method does not attempt to create an RNASequence.
127         * <p>
128         * Also, a sequence such as 'ATCGTA' which is both a
129         * peptide sequence and a DNA sequence, will always be returned 
130         * as a DNA sequence.
131         * </p>
132         * <p>
133         * An empty string argument returns a ProteinSequence of length 0.
134         * A null argument throws a {@link NullPointerException}
135         * @param sequence
136         * @return Either a DNASequence or a ProteinSequence
137         * @throws CompoundNotFoundException
138         */
139        public Sequence<?> getSequenceFromString(String sequence) throws CompoundNotFoundException {
140
141
142                if( isNucleotideSequence(sequence)) {
143                        return  new DNASequence(sequence);
144                } else {
145                        return new ProteinSequence(sequence);
146
147                }
148
149        }
150
151        /** A method to check whether an array of sequences contains at least two sequences having an equal length.
152         *
153         * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences
154         * @return true if any two sequences are of an equal length
155         */
156        public static boolean equalLengthSequences(ProteinSequence[] sequences) {
157
158                for (int i=0; i<sequences.length-1; i++) {
159                        if (sequences[i]==null)
160                                continue;
161                        for (int j=i+1; j<sequences.length; j++) {
162                                if (sequences[j]==null)
163                                        continue;
164                                if (sequences[i].getLength() == sequences[j].getLength())
165                                        return true;
166                        }
167                }
168                return false;
169        }
170}