001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.seq.io;
024
025import java.util.HashMap;
026import java.util.HashSet;
027import java.util.Iterator;
028import java.util.Map;
029import java.util.Set;
030
031import org.biojava.bio.symbol.FiniteAlphabet;
032import org.biojava.bio.symbol.IllegalSymbolException;
033import org.biojava.bio.symbol.Symbol;
034import org.biojava.utils.ChangeListener;
035import org.biojava.utils.ChangeType;
036
037/**
038 * Simple implementation of SymbolTokenization which uses the `name'
039 * field of the symbols.  This class works with any non-cross-product
040 * FiniteAlphabet, and doesn't need any extra data to be provided.
041 *
042 * @author Thomas Down
043 * @since 1.2 
044 */
045
046public class NameTokenization extends WordTokenization {
047    private transient Map nameToSymbol = null;
048    private boolean caseSensitive;
049
050    public NameTokenization(FiniteAlphabet fab, boolean caseSensitive) {
051        super(fab);
052        fab.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN);
053        this.caseSensitive = caseSensitive;
054    }
055
056    /**
057     * Construct a new NameTokenization, defaulting to case-insensitive.
058     */
059
060    public NameTokenization(FiniteAlphabet fab) {
061        this(fab, false);
062    }
063
064    protected void finalize() throws Throwable {
065        super.finalize();
066        getAlphabet().removeChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN);
067    }
068
069    protected Map getNameToSymbol() {
070        if (nameToSymbol == null) {
071            nameToSymbol = new HashMap();
072            for (Iterator i = ((FiniteAlphabet) getAlphabet()).iterator(); i.hasNext(); ) {
073                Symbol sym = (Symbol) i.next();
074                if (caseSensitive) {
075                    nameToSymbol.put(sym.getName(), sym);
076                } else {
077                    nameToSymbol.put(sym.getName().toLowerCase(), sym);
078                }
079            }
080            nameToSymbol.put("gap", getAlphabet().getGapSymbol());
081        }
082
083        return nameToSymbol;
084    }
085
086    public Symbol parseToken(String token)
087        throws IllegalSymbolException
088    {
089        Symbol sym;
090        if (caseSensitive) {
091            sym = (Symbol) getNameToSymbol().get(token);
092        } else {
093            sym = (Symbol) getNameToSymbol().get(token.toLowerCase());
094        }
095
096        if (sym == null) {
097            char c = token.charAt(0);
098            if (c == '[') {
099                if (token.charAt(token.length() - 1) != ']') {
100                    throw new IllegalSymbolException("Mismatched parentheses: " + token);
101                } else {
102                    Symbol[] syms = parseString(token.substring(1, token.length() - 1));
103                    Set ambigSet = new HashSet();
104                    for (int i = 0; i < syms.length; ++i) {
105                        ambigSet.add(syms[i]);
106                    }
107                    return getAlphabet().getAmbiguity(ambigSet);
108                }
109            } else {
110                throw new IllegalSymbolException("Token `" + token + "' does not appear as a named symbol in alphabet `" + getAlphabet().getName() + "'");
111            }
112        }
113        return sym;
114    }
115
116    public String tokenizeSymbol(Symbol s) throws IllegalSymbolException {
117        getAlphabet().validate(s);
118        return s.getName();
119    }
120}