How can I translate all six frames of a nucleotide Sequence?
This is probably one of the more frequent tasks in bioinformatics and
one of the most frequent questions posted to the mailing list.
Six frame translations are good for identifying large ORFs which can be
indicators of coding regions, at least in species that don’t have
introns. A six frame translation is a simple matter of taking
subsequences of the sequence(s) of interest and reverse
complementing/translating as appropriate. The only trick is figuring out
how to take the subsequences so you have regions that are equally
divisible by three.
NOTE: See ‘how to get a
subsequence’ for a
description of how to get a portion of a Sequence for translation.
The following example shows a simple program that will six frame
translate all sequences in a file and print the results to STDOUT in
fasta format.
import java.io.BufferedReader; import java.io.File; import
java.io.FileOutputStream; import java.io.FileReader; import
java.io.IOException; import java.io.PrintStream; import
java.util.NoSuchElementException;
import org.biojava.bio.Annotation; import org.biojava.bio.BioException;
import org.biojava.bio.seq.DNATools; import
org.biojava.bio.seq.RNATools; import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator; import
org.biojava.bio.seq.SequenceTools; import
org.biojava.bio.seq.io.SymbolTokenization; import
org.biojava.bio.symbol.AlphabetManager; import
org.biojava.bio.symbol.IllegalAlphabetException; import
org.biojava.bio.symbol.SymbolList; import
org.biojavax.bio.seq.RichSequence;
/\*\*
`* `
`* Program to six-frame translate a nucleotide sequence usage: java Hex `
`* `<dna|rna>
`* `
`*/`
public class Hex {
` public static void main(String[] args) {`
` `
` String filename = "";`
` String type = "";`
` try {`
` if (args.length != 0) {`
` filename = args[0];`
` type = args[1].toUpperCase();`
` }else{`
` filename =System.getProperty("java.io.tmpdir")+"/MYOZ1.fasta";`
` type="DNA";`
` FileOutputStream f = new FileOutputStream(new File(filename)); `
` PrintStream ps = new PrintStream(f);`
` ps.print(MYOZ1);`
` ps.close();`
` f.close();`
` }`
` SymbolTokenization toke = AlphabetManager.alphabetForName(type)`
` .getTokenization("token");`
` BufferedReader br = new BufferedReader(new FileReader(filename));`
` SequenceIterator seqi = RichSequence.IOTools.readFasta(br,`
` toke, null);`
` `
` // for each sequence`
` while (seqi.hasNext()) {`
` Sequence seq = seqi.nextSequence();`
` // for each frame`
` for (int i = 0; i < 3; i++) {`
` SymbolList prot;`
` Sequence trans;`
` // take the reading frame`
` // remember that in a SymbolList the first element has`
` // index= 1`
` // remember that if the length of the list evenly divisible`
` // by three an IllegalArgumentException will be thrown`
` SymbolList syms = seq.subList(i + 1, seq.length()`
` - (seq.length() - i) % 3);`
` // if it is DNA transcribe it to RNA`
` if (syms.getAlphabet() == DNATools.getDNA()) {`
` syms = DNATools.toRNA(syms);`
` }`
` // output forward translation to STDOUT`
` prot = RNATools.translate(syms);`
` trans = SequenceTools.createSequence(prot, "", seq`
` .getName()`
` + "TranslationFrame: +" + i,`
` Annotation.EMPTY_ANNOTATION);`
` /*`
` * This method is deprecated since BioJava 1.5`
` * SeqIOTools.writeFasta(System.out, trans);`
` */`
` RichSequence.IOTools.writeFasta(System.out, trans, null);`
` // output reverse frame translation to STDOUT`
` syms = RNATools.reverseComplement(syms);`
` prot = RNATools.translate(syms);`
` trans = SequenceTools.createSequence(prot, "", seq`
` .getName()`
` + " TranslationFrame: -" + i,`
` Annotation.EMPTY_ANNOTATION);`
` /*`
` * This method is deprecated since BioJava 1.5`
` * SeqIOTools.writeFasta(System.out, trans);`
` */`
` RichSequence.IOTools.writeFasta(System.out, trans, null);`
` }`
` }`
` br.close();`
` } catch (IOException e) {`
` e.printStackTrace();`
` } catch (IllegalAlphabetException e) {`
` e.printStackTrace();`
` } catch (NoSuchElementException e) {`
` e.printStackTrace();`
` } catch (BioException e) {`
` e.printStackTrace();`
` }`
` }`
` private static String MYOZ1 = ">gi|21359948|ref|NM_021245.2| Homo sapiens myozenin 1 (MYOZ1), mRNA "`
` + "\n"`
` + "GTTTCTCCCTAAGTGCTTCTTTGGATCTCAGGCTCTAGGTGCAATGTGAAGGGGAGTCCCTGGGCAGACTGATCCCTGGC"`
` + "TCAGACAGTTCAGTGGGAGAATCCCAAAGGCCTTTTCCCTCCTTCCTGAGCCTCCGGGCAAGGAGGGAGGGATCTTGGTT"`
` + "CCAGGGTCTCAGTACCCCCTGTGCCATTTGAGCTGCTTGCGCTCATCATCTCTATTAATAACCAACTTCCCTCCCCCACT"`
` + "GCCAGTGCTGCCCCCACGCCTGCCCAGCTCGTGTTCTCCGGTCACAGCAGCTCAGTCCTCCAAAGCTGCTGGACCCCAGG"`
` + "GAGAGCTGACCACTGCCCGAGCAGCCGGCTGAATCCACCTCCACAATGCCGCTCTCAGGAACCCCGGCCCCTAATAAGAA"`
` + "GAGGAAATCCAGCAAGCTGATCATGGAACTCACTGGAGGTGGACAGGAGAGCTCAGGCTTGAACCTGGGCAAAAAGATCA"`
` + "GTGTCCCAAGGGATGTGATGTTGGAGGAACTGTCGCTGCTTACCAACCGGGGCTCCAAGATGTTCAAACTGCGGCAGATG"`
` + "AGGGTGGAGAAGTTTATTTATGAGAACCACCCTGATGTTTTCTCTGACAGCTCAATGGATCACTTCCAGAAGTTCCTTCC"`
` + "AACAGTGGGGGGACAGCTGGGCACAGCTGGTCAGGGATTCTCATACAGCAAGAGCAACGGCAGAGGCGGCAGCCAGGCAG"`
` + "GGGGCAGTGGCTCTGCCGGACAGTATGGCTCTGATCAGCAGCACCATCTGGGCTCTGGGTCTGGAGCTGGGGGTACAGGT"`
` + "GGTCCCGCGGGCCAGGCTGGCAGAGGAGGAGCTGCTGGCACAGCAGGGGTTGGTGAGACAGGATCAGGAGACCAGGCAGG"`
` + "CGGAGAAGGAAAACATATCACTGTGTTCAAGACCTATATTTCCCCATGGGAGCGAGCCATGGGGGTTGACCCCCAGCAAA"`
` + "TGAACCCCTGGTCCTCTACAACCAAAACCTCTCCAACAGGCCTTCTTTCAATCGAACCCCTATTCCCTGGCTGAGCTCTG"`
` + "GGGAGCCTGTAGACTACAACGTGGATATTGGCATCCCCTTGGATGGAGAAACAGAGGAGCTGTGAGGTGTTTCCTCCTCT"`
` + "GATTTGCATCATTTCCCCTCTCTGGCTCCAATTTGGAGAGGGAATGCTGAGCAGATAGCCCCCATTGTTAATCCAGTATC"`
` + "CTTATGGGAATGGAGGGAAAAAGGAGAGATCTACCTTTCCATCCTTTACTCCAAGTCCCCACTCCACGCATCCTTCCTCA"`
` + "CCAACTCAGAGCTCCCCTTCTACTTGCTCCATATGGAACCTGCTCGTTTATGGAATTTGCTCTGCCACCAGTAACAGTCA"`
` + "ATAAACTTCAAGGAAAATGAAAAAAAA";`
} </java>