BioJava:Performance:ReadDrosophila

From BioJava

Jump to: navigation, search

Read Drosophila Genome

This source code is run in the example that reads the Drosophila genome and prints out the size of each chromosome:

import java.io.*;
import java.util.*;
 
import org.biojava.bio.*;
import org.biojava.bio.seq.*;
import org.biojava.bio.seq.db.*;
import org.biojava.bio.seq.io.*;
import org.biojava.bio.symbol.*;
 
public class ReadFasta {
 
 /**
* * The program takes two args: the first is the file name of the Fasta file.
* * The second is the name of the Alphabet. Acceptable names are DNA RNA or PROTEIN.
**/
 public static void main(String[] args) {
 
   try {
     //setup file input
     String filename = args[0];
     BufferedInputStream is =
       new BufferedInputStream(new FileInputStream(filename));
 
 
     //get the appropriate Alphabet
     Alphabet alpha = AlphabetManager.alphabetForName(args[1]);
 
     //get a SequenceDB of all sequences in the file
     SequenceDB db = SeqIOTools.readFasta(is, alpha);
 
     //list sequences and length
     SequenceIterator sI = db.sequenceIterator();
     long total = 0;
     while (sI.hasNext()) {
       Sequence seq = sI.nextSequence();
       System.out.println(seq.getName() + "\t" + seq.length());
       total += seq.length();
     }
     System.out.println("Total length is " + total);
   }
   catch (BioException ex) {
     //not in fasta format or wrong alphabet
   }catch (NoSuchElementException ex) {
     //no fasta sequences in the file
     ex.printStackTrace();
   }catch (FileNotFoundException ex) {
     //problem reading file
     ex.printStackTrace();
   }
 }
}
Personal tools