001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.seq.io;
024
025import java.util.ArrayList;
026import java.util.HashSet;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Set;
030
031import org.biojava.bio.BioException;
032import org.biojava.bio.symbol.Alphabet;
033import org.biojava.bio.symbol.BasisSymbol;
034import org.biojava.bio.symbol.FiniteAlphabet;
035import org.biojava.bio.symbol.IllegalSymbolException;
036import org.biojava.bio.symbol.Symbol;
037
038/**
039 * Tokenization for cross-product alphabets.  This class handles
040 * the general case of tokens of the form (foo bar baz), where
041 * each element is handled by a sub-tokenization.  By default,
042 * these will be the "name" tokenizations of each of the sub-alphabets,
043 * but any tokenization can be used.
044 *
045 * @author Thomas Down
046 * @author Greg Cox
047 * @since 1.2
048 */
049
050public class CrossProductTokenization extends WordTokenization {
051    private List subTokenizations;  // List<SymbolTokenization>
052
053    public CrossProductTokenization(Alphabet alpha)
054        throws BioException
055    {
056        super(alpha);
057        subTokenizations = new ArrayList();
058        for (Iterator i = alpha.getAlphabets().iterator(); i.hasNext(); ) {
059            Alphabet subAlpha = (Alphabet) i.next();
060            subTokenizations.add(subAlpha.getTokenization("name"));
061        }
062    }
063
064    public CrossProductTokenization(Alphabet alpha,
065                                    List tokenizers)
066    {
067        super(alpha);
068        this.subTokenizations = tokenizers;
069        // Ought to validate...
070    }
071
072    public Symbol parseToken(String token)
073        throws IllegalSymbolException
074    {
075        char c = token.charAt(0);
076        if (c == '(') {
077            if (token.charAt(token.length() - 1) != ')') {
078                throw new IllegalSymbolException("Mismatched parentheses: " + token);
079            } else {
080                List split = splitString(token.substring(1, token.length() - 1));
081                List syms = new ArrayList();
082
083                Iterator si = split.iterator();
084                Iterator ti = subTokenizations.iterator();
085                while (si.hasNext()) {
086                    String subToken = (String) si.next();
087                    SymbolTokenization subTokenization = (SymbolTokenization) ti.next();
088                    syms.add(subTokenization.parseToken(subToken));
089                }
090
091                return getAlphabet().getSymbol(syms);
092            }
093        } else if (c == '[') {
094            if (token.charAt(token.length() - 1) != ']') {
095                throw new IllegalSymbolException("Mismatched parentheses: " + token);
096            } else {
097                Symbol[] syms = parseString(token.substring(1, token.length() - 1));
098                Set ambigSet = new HashSet();
099                for (int i = 0; i < syms.length; ++i) {
100                    ambigSet.add(syms[i]);
101                }
102                return getAlphabet().getAmbiguity(ambigSet);
103            }
104        } else {
105            throw new IllegalSymbolException("Not in standard cross-product form: " + token);
106        }
107    }
108
109    public String tokenizeSymbol(Symbol s) throws IllegalSymbolException {
110        getAlphabet().validate(s);
111
112        if (s instanceof BasisSymbol) {
113            StringBuffer sb = new StringBuffer();
114            sb.append('(');
115            Iterator si = ((BasisSymbol) s).getSymbols().iterator();
116            Iterator ti = subTokenizations.iterator();
117
118            while (si.hasNext()) {
119                Symbol subSym = (Symbol) si.next();
120                SymbolTokenization subToke = (SymbolTokenization) ti.next();
121
122                sb.append(subToke.tokenizeSymbol(subSym));
123                if (si.hasNext()) {
124                    sb.append(' ');
125                }
126            }
127            sb.append(')');
128            return sb.substring(0);
129        } else {
130            StringBuffer sb = new StringBuffer();
131            sb.append('[');
132            Iterator si = ((FiniteAlphabet) s.getMatches()).iterator();
133            while (si.hasNext()) {
134                Symbol aSym = (Symbol) si.next();
135                sb.append(tokenizeSymbol(aSym));
136                if (si.hasNext()) {
137                    sb.append(' ');
138                }
139            }
140            sb.append(']');
141            return sb.substring(0);
142        }
143    }
144}