001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.util; 022 023import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 024import org.biojava.nbio.core.sequence.DNASequence; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.template.Sequence; 027 028public class SequenceTools { 029 030 protected static final String NUCLEOTIDE_LETTERS = "GCTAUXN"; 031 032 /** 033 * Cyclically permute the characters in {@code string} <em>forward</em> by {@code n} elements. 034 * @param string The string to permute 035 * @param n The number of characters to permute by; can be positive or negative; values greater than the length of the array are acceptable 036 */ 037 public static String permuteCyclic(String string, int n) { 038 String toMutate = string + string; 039 n = n % string.length(); 040 if (n < 0) { 041 n = string.length() + n; 042 } 043 return toMutate.substring(n, n + string.length()); 044 } 045 046 /** 047 * Cyclically permute {@code array} <em>forward</em> by {@code n} elements. 048 * @param array The original result; will not be changed 049 * @param fill The permuted result will be filled into this array 050 * @param n The number of elements to permute by; can be positive or negative; values greater than the length of the array are acceptable 051 */ 052 public static <T> void permuteCyclic(T[] array, T[] fill, int n) { 053 if (array.length != fill.length) throw new IllegalArgumentException("Lengths do not match"); 054 if (n < 0) n = array.length + n; 055 while (n > array.length) { 056 n -= array.length; 057 } 058 for (int i = 0; i < array.length; i++) { 059 if (i + n < array.length) { 060 fill[i] = array[i + n]; 061 } else { 062 fill[i] = array[i - array.length + n]; 063 } 064 } 065 } 066 067 public static int percentNucleotideSequence(String sequence) 068 { 069 if (sequence == null || sequence.length() == 0) return 0; 070 071 int l = sequence.length(); 072 int n =0; 073 074 for (int i = 0; i < l; i++) 075 { 076 if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0) 077 { 078 continue; 079 } 080 n++; 081 } 082 return (100 * n) / l; 083 } 084 085 public static boolean isNucleotideSequence(String sequence) 086 { 087 if (sequence == null || sequence.length() == 0) return false; 088 089 int l = sequence.length(); 090 for (int i = 0; i < l; i++) 091 { 092 if (NUCLEOTIDE_LETTERS.indexOf(sequence.charAt(i)) < 0) 093 { 094 return false; 095 } 096 } 097 return true; 098 } 099 100 /** 101 * Attempts to parse String as a DNA sequence first.<br/> 102 * If this fails it tries to parse as a ProteinSequence. 103 * <br/> 104 * This method does not attempt to create an RNASequence. 105 * <p> 106 * Also, a sequence such as 'ATCGTA' which is both a 107 * peptide sequence and a DNA sequence, will always be returned 108 * as a DNA sequence. 109 * </p> 110 * <p> 111 * An empty string argument returns a ProteinSequence of length 0. 112 * A null argument throws a {@link NullPointerException} 113 * @param sequence 114 * @return Either a DNASequence or a ProteinSequence 115 * @throws CompoundNotFoundException 116 */ 117 public Sequence<?> getSequenceFromString(String sequence) throws CompoundNotFoundException { 118 119 120 if( isNucleotideSequence(sequence)) { 121 return new DNASequence(sequence); 122 } else { 123 return new ProteinSequence(sequence); 124 125 } 126 127 } 128 129 /** A method to check whether an array of sequences contains at least two sequences having an equal length. 130 * 131 * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences 132 * @return true if any two sequences are of an equal length 133 */ 134 public static boolean equalLengthSequences(ProteinSequence[] sequences) { 135 136 for (int i=0; i<sequences.length-1; i++) { 137 if (sequences[i]==null) 138 continue; 139 for (int j=i+1; j<sequences.length; j++) { 140 if (sequences[j]==null) 141 continue; 142 if (sequences[i].getLength() == sequences[j].getLength()) 143 return true; 144 } 145 } 146 return false; 147 } 148}