001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.seq.io; 024 025import org.biojava.bio.Annotatable; 026import org.biojava.bio.symbol.Alphabet; 027import org.biojava.bio.symbol.IllegalAlphabetException; 028import org.biojava.bio.symbol.IllegalSymbolException; 029import org.biojava.bio.symbol.Symbol; 030import org.biojava.bio.symbol.SymbolList; 031 032/** 033 * Encapsulate a mapping between BioJava Symbol objects and 034 * some string representation. 035 * 036 * @author Thomas Down 037 * @since 1.2 038 */ 039 040public interface SymbolTokenization extends Annotatable { 041 public final static class TokenType { 042 private String type; 043 044 private TokenType(String type) { 045 this.type = type; 046 } 047 048 public String toString() 049 { 050 return "TokenType:" + type; 051 } 052 } 053 054 public final static TokenType CHARACTER = new TokenType("CHARACTER"); 055 public final static TokenType FIXEDWIDTH = new TokenType("FIXEDWIDTH"); 056 public final static TokenType SEPARATED = new TokenType("SEPARATED"); 057 public final static TokenType UNKNOWN = new TokenType("UNKNOWN"); 058 059 /** 060 * The alphabet to which this tokenization applies. 061 */ 062 063 public Alphabet getAlphabet(); 064 065 /** 066 * Determine the style of tokenization represented by this object. 067 */ 068 069 public TokenType getTokenType(); 070 071 072 /** 073 * Returns the symbol for a single token. 074 * <p> 075 * The Symbol will be a member of the alphabet. If the token is not recognized 076 * as mapping to a symbol, an exception will be thrown. 077 * 078 * @param token the token to retrieve a Symbol for 079 * @return the Symbol for that token 080 * @throws IllegalSymbolException if there is no Symbol for the token 081 */ 082 083 public Symbol parseToken(String token) 084 throws IllegalSymbolException; 085 086 /** 087 * Return an object which can parse an arbitrary character stream into 088 * symbols. 089 * 090 * @param listener The listener which gets notified of parsed symbols. 091 */ 092 093 public StreamParser parseStream(SeqIOListener listener); 094 095 /** 096 * Return a token representing a single symbol. 097 * 098 * @param sym The symbol 099 * @throws IllegalSymbolException if the symbol isn't recognized. 100 */ 101 102 public String tokenizeSymbol(Symbol sym) throws IllegalSymbolException; 103 104 /** 105 * Return a string representation of a list of symbols. 106 * 107 * @param symList A SymbolList 108 * @throws IllegalAlphabetException if alphabets don't match 109 */ 110 111 public String tokenizeSymbolList(SymbolList symList) throws IllegalAlphabetException, IllegalSymbolException; 112}