001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.util; 022 023import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 024import org.biojava.nbio.core.sequence.DNASequence; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.template.Sequence; 027 028public class SequenceTools { 029 030 protected static final String NUCLEOTIDE_LETTERS = "GCTAUXN"; 031 032 /** 033 * Cyclically permute the characters in {@code string} <em>forward</em> by {@code n} elements. 034 * @param string The string to permute 035 * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable 036 */ 037 public static String permuteCyclic(String string, int n) { 038 // single letters are char[]; full names are Character[] 039 Character[] permuted = new Character[string.length()]; 040 char[] c = string.toCharArray(); 041 Character[] charArray = new Character[c.length]; 042 for (int i = 0; i < c.length; i++) { 043 charArray[i] = c[i]; 044 } 045 permuteCyclic(charArray, permuted, n); 046 char[] p = new char[permuted.length]; 047 for (int i = 0; i < p.length; i++) { 048 p[i] = permuted[i]; 049 } 050 return String.valueOf(p); 051 } 052 053 /** 054 * Cyclically permute {@code array} <em>forward</em> by {@code n} elements. 055 * @param array The original result; will not be changed 056 * @param fill The permuted result will be filled into this array 057 * @param n The number of elements to permute by; can be positive or negative; values greater than the length of the array are acceptable 058 */ 059 public static <T> void permuteCyclic(T[] array, T[] fill, int n) { 060 if (array.length != fill.length) throw new IllegalArgumentException("Lengths do not match"); 061 if (n < 0) n = array.length + n; 062 while (n > array.length) { 063 n -= array.length; 064 } 065 for (int i = 0; i < array.length; i++) { 066 if (i + n < array.length) { 067 fill[i] = array[i + n]; 068 } else { 069 fill[i] = array[i - array.length + n]; 070 } 071 } 072 } 073 074 public static int percentNucleotideSequence(String sequence) 075 { 076 if (sequence == null || sequence.length() == 0) return 0; 077 078 int l = sequence.length(); 079 int n =0; 080 081 for (int i = 0; i < l; i++) 082 { 083 if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0) 084 { 085 continue; 086 } 087 n++; 088 } 089 return (100 * n) / l; 090 } 091 092 public static boolean isNucleotideSequence(String sequence) 093 { 094 if (sequence == null || sequence.length() == 0) return false; 095 096 int l = sequence.length(); 097 for (int i = 0; i < l; i++) 098 { 099 if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0) 100 { 101 return false; 102 } 103 } 104 return true; 105 } 106 107 public Sequence<?> getSequenceFromString(String sequence) throws CompoundNotFoundException { 108 109 110 if( isNucleotideSequence(sequence)) { 111 return new DNASequence(sequence); 112 } else { 113 return new ProteinSequence(sequence); 114 } 115 116 } 117 118 /** A method to check whether an array of sequences contains at least two sequences having an equal length. 119 * 120 * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences 121 * @return true if any two sequences are of an equal length 122 */ 123 public static boolean equalLengthSequences(ProteinSequence[] sequences) { 124 125 for (int i=0; i<sequences.length-1; i++) { 126 if (sequences[i]==null) 127 continue; 128 for (int j=i+1; j<sequences.length; j++) { 129 if (sequences[j]==null) 130 continue; 131 if (sequences[i].getLength() == sequences[j].getLength()) 132 return true; 133 } 134 } 135 return false; 136 } 137}