001package org.biojava.bio.program.formats; 002 003import org.biojava.bio.AnnotationType; 004import org.biojava.bio.program.tagvalue.ParserListener; 005import org.biojava.bio.program.tagvalue.TagValueListener; 006import org.biojava.utils.lsid.LifeScienceIdentifier; 007 008/** 009 * A file format supported by the tag-value event-based parsing system. 010 * 011 * <p>Format should be implemented to provide pre-canned access to common 012 * formats such as Embl, genbank, swissprot, enzyme etc. so that people do not 013 * need to work out which events should be produced by a given file format. 014 * It is expcected that implementations of Format will publish meta-data 015 * about what tags are associated with which values. 016 * </p> 017 * 018 * @author Matthew Pocock 019 * @since 1.3 020 */ 021public interface Format { 022 /** 023 * Retrieve a ParserListener pair for the format that will pass all events 024 * on to a listener. 025 * 026 * Call this method to get a working parser that can be fed into a tag-value 027 * parsing pipeline. 028 * 029 * This method may well be called many times during the lifetime of an 030 * applications.. You should make this threadsafe. To avoid buring too much 031 * memory, and to facilitate the comparrison of object by the == operator, 032 * it is usefull to shair as much data as possible between the parsers and 033 * handlers returned by this method. 034 * 035 * @param listener a TagValueListener that all events should be passed onto 036 * @return a ParserListener for the format 037 */ 038 ParserListener getParserListener(TagValueListener listener); 039 040 /** 041 * Get the AnnotationType that constrains the events that will be fired. 042 * 043 * If you feed the events from the ParserListener into somethign that builds 044 * Annotation bundles, this is the AnnotationType that those bundles will 045 * conform to. 046 * 047 * In the cases where the events have been sensibly crafted, it will be 048 * possible to introspect a great deal about the parsing events from this 049 * AnntoationType. Use it to dynamicaly bind events to object models, generate 050 * gui componets, and to work out which formats contain cross-refferenceable 051 * information. 052 * 053 * It is polite to return a full and constrained description of the types of 054 * oevents that may be generated, how many of them could come (cardinality) 055 * and what types of values will be associated with them. The use of 056 * OmtologyTerm instances as property names is encouraged. 057 * 058 * @return an AnnotationType representingchema for the events 059 */ 060 AnnotationType getType(); 061 062 /** 063 * Retrieve the LSID associated with this format. 064 * 065 * <p>The OBDA recomends taht file formats have identifiers assopciated with 066 * them. This allows the format to be specified unambiguously across 067 * different projects and groups. Idealy, a format LSID should conform to 068 * the odda <a href="http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/registry/lsid_for_dbformats.txt?rev=HEAD&cvsroot=obf-common&content-type=text/vnd.viewcvs-markup">formats specification</a>.</p> 069 */ 070 LifeScienceIdentifier getLSID(); 071} 072