001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.seq.io;
024
025import org.biojava.bio.Annotatable;
026import org.biojava.bio.symbol.Alphabet;
027import org.biojava.bio.symbol.IllegalAlphabetException;
028import org.biojava.bio.symbol.IllegalSymbolException;
029import org.biojava.bio.symbol.Symbol;
030import org.biojava.bio.symbol.SymbolList;
031
032/**
033 * Encapsulate a mapping between BioJava Symbol objects and
034 * some string representation.
035 *
036 * @author Thomas Down
037 * @since 1.2
038 */
039
040public interface SymbolTokenization extends Annotatable {
041  public final static class TokenType {
042    private String type;
043
044    private TokenType(String type) {
045      this.type = type;
046    }
047
048    public String toString()
049    {
050      return "TokenType:" + type;
051    }
052  }
053
054  public final static TokenType CHARACTER = new TokenType("CHARACTER");
055  public final static TokenType FIXEDWIDTH = new TokenType("FIXEDWIDTH");
056  public final static TokenType SEPARATED = new TokenType("SEPARATED");
057  public final static TokenType UNKNOWN = new TokenType("UNKNOWN");
058
059  /**
060   * The alphabet to which this tokenization applies.
061   */
062
063  public Alphabet getAlphabet();
064
065  /**
066   * Determine the style of tokenization represented by this object.
067   */
068
069  public TokenType getTokenType();
070
071
072  /**
073   * Returns the symbol for a single token.
074   * <p>
075   * The Symbol will be a member of the alphabet. If the token is not recognized
076   * as mapping to a symbol, an exception will be thrown.
077   *
078   * @param token the token to retrieve a Symbol for
079   * @return the Symbol for that token
080   * @throws IllegalSymbolException if there is no Symbol for the token
081   */
082
083  public Symbol parseToken(String token)
084          throws IllegalSymbolException;
085
086  /**
087   * Return an object which can parse an arbitrary character stream into
088   * symbols.
089   *
090   * @param listener The listener which gets notified of parsed symbols.
091   */
092
093  public StreamParser parseStream(SeqIOListener listener);
094
095  /**
096   * Return a token representing a single symbol.
097   *
098   * @param sym The symbol
099   * @throws IllegalSymbolException if the symbol isn't recognized.
100   */
101
102  public String tokenizeSymbol(Symbol sym) throws IllegalSymbolException;
103
104  /**
105   * Return a string representation of a list of symbols.
106   *
107   * @param symList A SymbolList
108   * @throws IllegalAlphabetException if alphabets don't match
109   */
110
111  public String tokenizeSymbolList(SymbolList symList) throws IllegalAlphabetException, IllegalSymbolException;
112}