001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.dp;
024
025import org.biojava.bio.Annotation;
026import org.biojava.bio.BioError;
027import org.biojava.bio.dist.DistributionFactory;
028import org.biojava.bio.symbol.Alphabet;
029import org.biojava.bio.symbol.IllegalAlphabetException;
030import org.biojava.bio.symbol.IllegalSymbolException;
031import org.biojava.utils.ChangeVetoException;
032
033/**
034 * @author Matthew Pocock
035 * @author Lachlan Coin
036 */
037public class ProfileHMM extends SimpleMarkovModel {
038  /**
039   * The advance array.
040   */
041  private final static int [] advance = { 1 };
042
043  /**
044   * The number of columns in this model.
045   */
046  private final int columns;
047
048  /**
049   * Match states array.
050   * <p>
051   * matchStates[0] == matchStates[columns+1] == magicalState().
052   */
053  private final EmissionState [] matchStates;
054
055  /**
056   * Insert states array.
057   * <p>
058   * From 0 .. columns().
059   */
060  private final EmissionState [] insertStates;
061
062  /**
063   * Delete states array.
064   * <p>
065   * From 0 .. columns()-1 corresponding to indexes 1..columns().
066   */
067  private final DotState [] deleteStates;
068
069  /**
070   * Retrieve the number of columns in the model.
071   *
072   * @return the number of columns
073   */
074  public int columns() {
075    return columns;
076  }
077
078  /**
079   * Retrieve the match state at column indx.
080   * <p>
081   * The first match state is at index 1, and the last match state is at
082   * column columns(). The states at index 0 and columns()+1 are both the
083   * magical state. This is so that the whole model backbone can be addressed
084   * without writing lots of special-case code.
085   *
086   * @param indx  the index of the column to retrieve the match state for
087   * @return the match state for column indx
088   * @throws IndexOutOfBoundsException if indx is negative or above columns()+1
089   */
090  public EmissionState getMatch(int indx)
091  throws IndexOutOfBoundsException {
092    if(indx < 0 || indx > (columns+1) ) {
093      throw new IndexOutOfBoundsException(
094        "Match-state index must be within (0.." + (columns + 1) + "), not " + indx
095      );
096    }
097
098    return matchStates[indx];
099  }
100
101  /**
102   * Retrieves the insert state at column indx.
103   * <p>
104   * Insert_0 is the insert that is accessible directly from the magical state.
105   * Insert_1..columns() are 'above' each propper match state. There is no
106   * insert state above the magical state at the end of the model, as insert_columns
107   * already models trailing inserts.
108   *
109   * @param indx  the index of the column to retrieve the insert state for
110   * @return the insert state for column indx
111   * @throws IndexOutOfBoundsException if indx is negative or above columns()
112   */
113  public EmissionState getInsert(int indx)
114  throws IndexOutOfBoundsException {
115    if(indx < 0 || indx > columns ) {
116      throw new IndexOutOfBoundsException(
117        "Insert-state index must be within (0.." + columns + "), not " + indx
118      );
119    }
120
121    return insertStates[indx];
122  }
123
124  /**
125   * Retrieves the delete state for column indx.
126   * <p>
127   * Delete states are 'above' the match state that they delete. There is no
128   * delete state for the magical state at either the beginning or end of the
129   * model, so the delete state indx can range within (1..columns()).
130   *
131   * @param indx  the index of the column to retrieve the insert state for
132   * @return the insert state for column indx
133   * @throws IndexOutOfBoundsException if indx is negative or above columns()
134   */
135  public DotState getDelete(int indx)
136  throws IndexOutOfBoundsException {
137    if(indx < 1 || indx > columns ) {
138      throw new IndexOutOfBoundsException(
139        "delete-state index must be within (1.." + columns + "), not " + indx
140      );
141    }
142
143    return deleteStates[indx-1];
144  }
145
146    /**
147     * @deprecated
148     */
149  public ProfileHMM(
150    Alphabet alpha, int columns,
151    DistributionFactory matchFactory, DistributionFactory insertFactory) throws IllegalSymbolException, IllegalTransitionException,  IllegalAlphabetException {
152    this(alpha, columns, matchFactory, insertFactory,"");
153  }
154
155
156
157  /**
158   * Create a new ProfileHMM.
159   * <p>
160   * The profile will be over the Alphabet alpha. It will have 'columns' match states,
161   * 'columns' delete states and 'columns'+1 insert states. The match states will be
162   * created by matchFactory, and the insert states will be created by
163   * insertFactory. This gives you great freedom for changing how the states are
164   * implemented. For example, insertFactory may always return views onto a single
165   * underlying probability distribution in which case all insert states will
166   * emit the same type of stuff - or it may create an individual state for
167   * each insert state, in which case each insert can be a different type of thing.
168   * You could make the insert state a view onto your null model, or onto a
169   * protein-specific distribution.
170   *
171   * @param alpha the Alphabet that the profile hmm will emit
172   * @param columns the number of match states
173   * @param matchFactory the StateFactory to use for creating the match states
174   * @param insertFactory the stateFactory to use for creating the insert states
175   */
176  public ProfileHMM(
177    Alphabet alpha, int columns,
178    DistributionFactory matchFactory, DistributionFactory insertFactory, String name)
179      throws IllegalSymbolException, IllegalTransitionException,
180  IllegalAlphabetException {
181    super(1, alpha,name);
182
183    try {
184      this.columns = columns;
185      this.matchStates = new EmissionState[columns+2];
186      this.insertStates = new EmissionState[columns+1];
187      this.deleteStates = new DotState[columns];
188
189      EmissionState mO = magicalState();
190      EmissionState iO = new SimpleEmissionState(
191        "i-0",
192        Annotation.EMPTY_ANNOTATION,
193        advance,
194        insertFactory.createDistribution(alpha)
195      );
196
197      matchStates[0] = mO;
198      insertStates[0] = iO;
199
200      // first column - a leading insert & the magical state
201      addState(iO);
202
203      // 'body' columns
204      for(int i = 1; i <= columns; i++) {
205        EmissionState mN = new SimpleEmissionState(
206          "m-" + i,
207          Annotation.EMPTY_ANNOTATION,
208          advance,
209          matchFactory.createDistribution(alpha)
210        );
211        EmissionState iN = new SimpleEmissionState(
212          "i-" + i,
213          Annotation.EMPTY_ANNOTATION,
214          advance,
215          insertFactory.createDistribution(alpha)
216        );
217        DotState dN = new SimpleDotState("d-" + i);
218
219        addState(mN);
220        addState(iN);
221        addState(dN);
222
223        matchStates[i] = mN;
224        insertStates[i] = iN;
225        deleteStates[i-1] = dN;
226
227
228        mO = mN;
229        iO = iN;
230      }
231      matchStates[columns+1] = magicalState();
232      connectModel();
233    } catch (ChangeVetoException cve) {
234      throw new BioError(
235
236        "Unable to construct profile HMM", cve
237      );
238    }
239  }
240
241    /** This is called by constructor in setting up the allowed transitions in the model */
242    protected void connectModel() throws
243        ChangeVetoException, IllegalSymbolException, IllegalTransitionException,IllegalAlphabetException{
244        EmissionState mO = getMatch(0);
245        EmissionState iO = getInsert(0);
246        DotState dO = null;
247        createTransition(mO,iO);
248        createTransition(iO,iO);
249        for(int i = 1; i <= columns(); i++){
250            EmissionState mN = getMatch(i);
251            EmissionState iN = getInsert(i);
252            DotState dN = getDelete(i);
253
254            // from a model state
255            createTransition(mO, mN);
256            createTransition(mN, iN);
257            createTransition(mO, dN);
258
259            // from an insert state
260            createTransition(iN, iN);
261            createTransition(iO, mN);
262            createTransition(iO, dN);
263
264            // from a delete state
265            if(i > 1) {
266                createTransition(dO, dN);
267                createTransition(dO, mN);
268            }
269            createTransition(dN,iN);
270
271            mO = mN;
272            iO = iN;
273            dO = dN;
274        }
275        // for the transitions to end
276        createTransition(mO, magicalState());
277        createTransition(iO, magicalState());
278        createTransition(dO, magicalState());
279    }
280
281}