BioJava:Cookbook:Annotations:List2

When you read in a annotated sequence file such as from GenBank or EMBL there is a lot more detailed information than just the raw sequence. If the information has a sensible location then it ends up as a Feature. Each of these features can be annotated with specific information.

The following program reads in a Genbank or EMBL file and outputs annotation information about each of the CDS features

```java /**

* Class to load an EMBL or Genbank sequence file and output annotation information about the CDS features.
*/

//Java libraries import java.io.*; import java.util.*; //BioJava libraries import org.biojava.bio.*; import org.biojava.bio.seq.*; import org.biojava.bio.seq.io.*; //BioJava extension libraries import org.biojavax.*; import org.biojavax.ontology.*; import org.biojavax.bio.*; import org.biojavax.bio.seq.*;

public class ExtractInformation {

 //Create the RichSequence object
 RichSequence richSeq;

 //ExtractInformation constructor
 public ExtractInformation(String fileName){
   //Load the sequence file
   try {
     richSeq = RichSequence.IOTools.readGenbankDNA(new BufferedReader(new FileReader(fileName)),null).nextRichSequence();
   }
   catch(FileNotFoundException fnfe){
     System.out.println("FileNotFoundException: " + fnfe);
   }
   catch(BioException bioe1){
     System.err.println("Not a Genbank sequence trying EMBL");
     try  {
       richSeq = RichSequence.IOTools.readEMBLDNA(new BufferedReader(new FileReader(fileName)),null).nextRichSequence();
     }
     catch(BioException bioe2){
       System.err.println("Not an EMBL sequence either");
       System.exit(1);
     }
     catch(FileNotFoundException fnfe){
       System.out.println("FileNotFoundException: " + fnfe);
     }
   }
   //Filter the sequence on CDS features
   FeatureFilter ff = new FeatureFilter.ByType("CDS");
   FeatureHolder fh = richSeq.filter(ff);

   //Iterate through the CDS features
   for (Iterator ` i = fh.features(); i.hasNext();){` `     RichFeature rf = (RichFeature)i.next();` `     ` `     //Get the strand orientation of the feature` `     char featureStrand = rf.getStrand().getToken();`

     //Get the location of the feature
     String featureLocation = rf.getLocation().toString();
     
     //Get the annotation of the feature
     RichAnnotation ra = (RichAnnotation)rf.getAnnotation();

     //Use BioJava defined ComparableTerms 
     ComparableTerm geneTerm = new RichSequence.Terms().getGeneNameTerm();
     ComparableTerm synonymTerm = new RichSequence.Terms().getGeneSynonymTerm();
     //Create the required additional ComparableTerms
     ComparableTerm locusTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm("locus_tag");
     ComparableTerm productTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm("product");
     ComparableTerm proteinIDTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm("protein_id");
     
     //Create empty strings
     String gene = "";
     String locus = "";
     String product = "";
     String geneSynonym = "";
     String proteinID = "";

     //Iterate through the notes in the annotation 
     for (Iterator 

it = ra.getNoteSet().iterator(); it.hasNext();){ `       Note note = it.next();` `     ` `     //Check each note to see if it matches one of the required ComparableTerms` `       if(note.getTerm().equals(locusTerm)){` `         locus = note.getValue().toString();` `       }` `       if(note.getTerm().equals(productTerm)){` `         product = note.getValue().toString();` `       }` `       if(note.getTerm().equals(geneTerm)){` `         gene = note.getValue().toString();` `       }` `       if(note.getTerm().equals(synonymTerm)){` `         geneSynonym = note.getValue().toString();` `       }` `       if(note.getTerm().equals(proteinIDTerm)){` `         proteinID = note.getValue().toString();` `       }` `     }` `     //Outout the feature information` `     System.out.println(locus + "  " + gene + "  " + geneSynonym + "  " + proteinID + "  " + product + "  " + featureStrand + "  " + featureLocation);` `   }` ` }` ` ` ` //Main method` ` public static void main(String args []){` `   if (args.length != 1){` `     System.out.println("Usage: java ExtractInformation ``");` `     System.exit(1);` `   }` `   else {` `     new ExtractInformation(args[0]);` `   }` ` }` } ```