001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.dp;
024
025import org.biojava.bio.alignment.Alignment;
026
027/**
028 * Extends the Alignment interface so that it is explicitly used to represent
029 * a state path through an HMM, and the associated emitted sequence and
030 * likelihoods.
031 * <p>
032 * A state path should have the following structure:
033 * <bq>
034 * STATES -> list of all states used by the machine
035 * <br>
036 * SCORES -> list of step-wise scores for each state (transition + emission)
037 * <br>
038 * SEQUENCE -> sequence emitted by the machine 
039 * </bq>
040 * The sequence emitted by the machine will be some function of the sequences
041 * that were aligned to the machine, and the state-path taken. Whenever the
042 * state used is a non-emitting state, this emitted sequence is a gap. Whenever
043 * it is an emission state, it is the symbol matched by that state. This is
044 * modeled by the following nesting:
045 * <bq><pre>
046 * SEQUENCE
047 *   -> Gapped view (gap inserted for every position aligned with a dot-state
048 *     -> Sequence emitted by emission states as Alignment
049 *       label_n = input_SymbolList_n
050 *         -> gapped view of SymbolList_n
051 * </pre></bq>
052 * A multi-head HMM (2 or more) emits a single sequence that is
053 * an alignment of the input sequences with gaps added. In this case, the
054 * emitted sequence should be an Alignment object with labels being the input
055 * sequences, and the associated SymbolList objects being gapped views. For the
056 * sake of least-suprise, single-head HMMs should emit an alignment of one
057 * sequence, where the label is the input sequence, and the associated
058 * SymbolList is also the input sequence.
059 *
060 * <p>
061 * I think that this scheme keeps the emitted alignment as close as possible to
062 * a sensible path through the sequence coordinate space, while making this
063 * gappable adapts this to the same co-ordinate system as the HMM state-path
064 * space.
065 * </p>
066 *
067 * @author Matthew Pocock
068 */
069public interface StatePath extends Alignment {
070  /**
071   * Alignment label for the emitted sequence.
072   */
073  public static final Object SEQUENCE = "SEQUENCE";
074
075  /**
076   * Alignment label for the state path.
077   */
078  public static final Object STATES   = "STATES";
079
080  /**
081   * Alignment label for the likelyhood at each step.
082   */
083  public static final Object SCORES   = "SCORES";
084  
085  /**
086   * Return the overall score for this state-path and it's emissions.
087   *
088   * @return the score
089   */
090  public double getScore();
091}