BioJava:Performance:ReadDrosophila
Read Drosophila Genome
This source code is run in the example that reads the Drosophila genome and prints out the size of each chromosome:
```java import java.io.*; import java.util.*;
import org.biojava.bio.*; import org.biojava.bio.seq.*; import org.biojava.bio.seq.db.*; import org.biojava.bio.seq.io.*; import org.biojava.bio.symbol.*;
public class ReadFasta {
/**
-
- The program takes two args: the first is the file name of the Fasta file.
-
- The second is the name of the Alphabet. Acceptable names are DNA RNA or PROTEIN.
- /
public static void main(String[] args) {
try {
//setup file input
String filename = args[0];
BufferedInputStream is =
new BufferedInputStream(new FileInputStream(filename));
//get the appropriate Alphabet
Alphabet alpha = AlphabetManager.alphabetForName(args[1]);
//get a SequenceDB of all sequences in the file
SequenceDB db = SeqIOTools.readFasta(is, alpha);
//list sequences and length
SequenceIterator sI = db.sequenceIterator();
long total = 0;
while (sI.hasNext()) {
Sequence seq = sI.nextSequence();
System.out.println(seq.getName() + "\t" + seq.length());
total += seq.length();
}
System.out.println("Total length is " + total);
}
catch (BioException ex) {
//not in fasta format or wrong alphabet
}catch (NoSuchElementException ex) {
//no fasta sequences in the file
ex.printStackTrace();
}catch (FileNotFoundException ex) {
//problem reading file
ex.printStackTrace();
}
}
} ```