001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.ssbind;
023
024import org.biojava.bio.BioError;
025import org.biojava.bio.BioException;
026import org.biojava.bio.seq.DNATools;
027import org.biojava.bio.seq.ProteinTools;
028import org.biojava.bio.symbol.FiniteAlphabet;
029
030/**
031 * <code>AlphabetResolver</code>s are helpers which determine which
032 * type of sequence <code>Alphabet</code> to expect from a search
033 * result. Now public to allow use by anyone making custom handlers.
034 *
035 * @author Keith James
036 * @since 1.2
037 */
038public class AlphabetResolver
039{
040    static final int     DNA = 0;
041    static final int PROTEIN = 1;
042
043    /**
044     * <code>resolveAlphabet</code> returns an appropriate
045     * <code>Alphabet</code> for an arbitrary identifier. The protein
046     * alphabet returned will include the termination character as
047     * e.g. BLASTX 6-frame translations are likely to include stops.
048     *
049     * @param identifier a <code>String</code> identifier (recognised
050     * are BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, DNA and PROTEIN).
051     *
052     * @return a <code>FiniteAlphabet</code>.
053     *
054     * @exception BioException if the identifier is not known.
055     */
056    public static FiniteAlphabet resolveAlphabet(String identifier)
057        throws BioException
058    {
059        int type = 0;
060
061        identifier = identifier.toUpperCase();
062
063        if (identifier.indexOf("TBLASTN") != -1)
064            type = PROTEIN;
065        else if (identifier.indexOf("TBLASTX") != -1)
066            type = PROTEIN;
067        else if (identifier.indexOf("BLASTN") != -1)
068            type = DNA;
069        else if (identifier.indexOf("BLASTP") != -1)
070            type = PROTEIN;
071        else if (identifier.indexOf("BLASTX") != -1)
072            type = PROTEIN;
073        else if (identifier.indexOf("DNA") != -1)
074            type = DNA;
075        else if (identifier.indexOf("PROTEIN") != -1)
076            type = PROTEIN;
077        else
078            throw new BioException("Failed to resolve sequence type from identifier '"
079                                   + identifier
080                                   + "'");
081
082        switch (type)
083        {
084            case DNA:
085                return DNATools.getDNA();
086
087            case PROTEIN:
088                return ProteinTools.getTAlphabet();
089
090            default:
091                throw new BioError("Internal error in AlphabetResolver: failed to resolve to either DNA or protein alphabets");
092        }
093    }
094}