001package org.biojava.bio.program.formats;
002
003import org.biojava.bio.AnnotationType;
004import org.biojava.bio.program.tagvalue.ParserListener;
005import org.biojava.bio.program.tagvalue.TagValueListener;
006import org.biojava.utils.lsid.LifeScienceIdentifier;
007
008/**
009 * A file format supported by the tag-value event-based parsing system.
010 *
011 * <p>Format should be implemented to provide pre-canned access to common
012 * formats such as Embl, genbank, swissprot, enzyme etc. so that people do not
013 * need to work out which events should be produced by a given file format.
014 * It is expcected that implementations of Format will publish meta-data
015 * about what tags are associated with which values.
016 * </p>
017 *
018 * @author Matthew Pocock
019 * @since 1.3
020 */
021public interface Format {
022  /**
023   * Retrieve a ParserListener pair for the format that will pass all events
024   * on to a listener.
025   *
026   * Call this method to get a working parser that can be fed into a tag-value
027   * parsing pipeline.
028   *
029   * This method may well be called many times during the lifetime of an
030   * applications.. You should make this threadsafe. To avoid buring too much
031   * memory, and to facilitate the comparrison of object by the == operator,
032   * it is usefull to shair as much data as possible between the parsers and
033   * handlers returned by this method.
034   *
035   * @param listener  a TagValueListener that all events should be passed onto
036   * @return a ParserListener for the format
037   */
038  ParserListener getParserListener(TagValueListener listener);
039
040  /**
041   * Get the AnnotationType that constrains the events that will be fired.
042   *
043   * If you feed the events from the ParserListener into somethign that builds
044   * Annotation bundles, this is the AnnotationType that those bundles will
045   * conform to.
046   *
047   * In the cases where the events have been sensibly crafted, it will be
048   * possible to introspect a great deal about the parsing events from this
049   * AnntoationType. Use it to dynamicaly bind events to object models, generate
050   * gui componets, and to work out which formats contain cross-refferenceable
051   * information.
052   *
053   * It is polite to return a full and constrained description of the types of
054   * oevents that may be generated, how many of them could come (cardinality)
055   * and what types of values will be associated with them. The use of
056   * OmtologyTerm instances as property names is encouraged.
057   *
058   * @return an AnnotationType representingchema for the events
059   */
060  AnnotationType getType();
061
062  /**
063   * Retrieve the LSID associated with this format.
064   *
065   * <p>The OBDA recomends taht file formats have identifiers assopciated with
066   * them. This allows the format to be specified unambiguously across
067   * different projects and groups. Idealy, a format LSID should conform to
068   * the odda <a href="http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/registry/lsid_for_dbformats.txt?rev=HEAD&cvsroot=obf-common&content-type=text/vnd.viewcvs-markup">formats specification</a>.</p>
069   */
070  LifeScienceIdentifier getLSID();
071}
072