001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.util;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.biojava.nbio.core.sequence.template.Sequence;
027
028public class SequenceTools {
029
030        protected static final String NUCLEOTIDE_LETTERS = "GCTAUXN";
031
032        /**
033         * Cyclically permute the characters in {@code string} <em>forward</em> by {@code n} elements.
034         * @param string The string to permute
035         * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable
036         */
037        public static String permuteCyclic(String string, int n) {
038                String toMutate = string + string;
039                n = n % string.length();
040                if (n < 0) {
041                        n = string.length() + n;
042                }
043                return toMutate.substring(n, n + string.length());
044        }
045
046        /**
047         * Cyclically permute {@code array} <em>forward</em> by {@code n} elements.
048         * @param array The original result; will not be changed
049         * @param fill The permuted result will be filled into this array
050         * @param n The number of elements to permute by; can be positive or negative; values greater than the length of the array are acceptable
051         */
052        public static <T> void permuteCyclic(T[] array, T[] fill, int n) {
053                if (array.length != fill.length) throw new IllegalArgumentException("Lengths do not match");
054                if (n < 0) n = array.length + n;
055                while (n > array.length) {
056                        n -= array.length;
057                }
058                for (int i = 0; i < array.length; i++) {
059                        if (i + n < array.length) {
060                                fill[i] = array[i + n];
061                        } else {
062                                fill[i] = array[i - array.length + n];
063                        }
064                }
065        }
066
067        public static int percentNucleotideSequence(String sequence)
068        {
069                        if (sequence == null || sequence.length() == 0) return 0;
070
071                        int l = sequence.length();
072                        int n =0;
073
074                        for (int i = 0; i < l; i++)
075                        {
076                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
077                                        {
078                                                        continue;
079                                        }
080                                        n++;
081                        }
082                        return (100 * n) / l;
083        }
084
085        public static boolean isNucleotideSequence(String sequence)
086        {
087                        if (sequence == null || sequence.length() == 0) return false;
088
089                        int l = sequence.length();
090                        for (int i = 0; i < l; i++)
091                        {
092                                        if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0)
093                                        {
094                                                        return false;
095                                        }
096                        }
097                        return true;
098        }
099
100        /**
101         * Attempts to parse String as a DNA sequence first.<br/>
102         * If this fails it tries to  parse as a ProteinSequence.
103         * <br/>
104         * This method does not attempt to create an RNASequence.
105         * <p>
106         * Also, a sequence such as 'ATCGTA' which is both a
107         * peptide sequence and a DNA sequence, will always be returned 
108         * as a DNA sequence.
109         * </p>
110         * <p>
111         * An empty string argument returns a ProteinSequence of length 0.
112         * A null argument throws a {@link NullPointerException}
113         * @param sequence
114         * @return Either a DNASequence or a ProteinSequence
115         * @throws CompoundNotFoundException
116         */
117        public Sequence<?> getSequenceFromString(String sequence) throws CompoundNotFoundException {
118
119
120                if( isNucleotideSequence(sequence)) {
121                        return  new DNASequence(sequence);
122                } else {
123                        return new ProteinSequence(sequence);
124
125                }
126
127        }
128
129        /** A method to check whether an array of sequences contains at least two sequences having an equal length.
130         *
131         * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences
132         * @return true if any two sequences are of an equal length
133         */
134        public static boolean equalLengthSequences(ProteinSequence[] sequences) {
135
136                for (int i=0; i<sequences.length-1; i++) {
137                        if (sequences[i]==null)
138                                continue;
139                        for (int j=i+1; j<sequences.length; j++) {
140                                if (sequences[j]==null)
141                                        continue;
142                                if (sequences[i].getLength() == sequences[j].getLength())
143                                        return true;
144                        }
145                }
146                return false;
147        }
148}