001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.dp.twohead;
024
025import java.io.Serializable;
026import java.util.Arrays;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030
031import org.biojava.bio.dist.Distribution;
032import org.biojava.bio.dp.EmissionState;
033import org.biojava.bio.dp.MagicalState;
034import org.biojava.bio.dp.ScoreType;
035import org.biojava.bio.dp.State;
036import org.biojava.bio.symbol.Alphabet;
037import org.biojava.bio.symbol.AlphabetManager;
038import org.biojava.bio.symbol.IllegalSymbolException;
039import org.biojava.bio.symbol.Symbol;
040import org.biojava.utils.ListTools;
041
042/**
043 * Cache for columns of emission probabilities in pair-wise alignment
044 * algorithms. 
045 *
046 * @author Matthew Pocock
047 * @author David Huen (fixes for magical state)
048 */
049public class EmissionCache implements Serializable{
050  private final Map eMap;
051  private final Alphabet alpha;
052  private final State[] states;
053  private final int dsi;
054  private final ScoreType scoreType;
055  private final Symbol[] gap;
056  
057  public EmissionCache(
058    Alphabet alpha,
059    State[] states,
060    int dsi,
061    ScoreType scoreType
062  ) {
063    this.eMap = new HashMap();
064    this.alpha = alpha;
065    this.states = states;
066    this.dsi = dsi;
067    this.scoreType = scoreType;
068    
069    List alphas = alpha.getAlphabets();
070    this.gap = new Symbol[alphas.size()];
071    for(int i = 0; i < this.gap.length; i++) {
072      this.gap[i] = ((Alphabet) alphas.get(i)).getGapSymbol();
073    }
074  }
075
076    public final double [] getEmissions(List symList)
077        throws IllegalSymbolException
078    {
079        return getEmissions(symList, true);
080    }
081 
082    /**
083     * Retrieve the emission scores from the cache for every EmissionState
084     * for the specified symbols.  The scores
085     *  will be computed and cached if not already available.
086     * <p>
087     * The correct behaviour should be to exclude emission from the
088     * MagicalState except at the origin and bottom-right of the DP
089     * matrix.  As such, the emission vector is only cached when it
090     * it is computed for exorcise set to true. The vector is computed
091     * afresh every time when exorcise is false.  it should be noted that
092     * if exorcise is <b>NOT<b> set to false with the emission vector
093     * at the ends of the matrix, the model will fail as the start and
094     * and end transitions become impossible.
095     *
096     * @param symList a list of the symbols in each head that require a lookup for emission probabilities.
097     * @param exorcise Prevents emission from the MagicalState.
098     */
099    public final double [] getEmissions(List symList, boolean exorcise)
100        throws IllegalSymbolException 
101    {
102        double [] emission;
103        if (exorcise) {
104            emission = (double []) eMap.get(symList);
105            if(emission == null) {
106                emission = computeEmissions(symList, true);
107
108                eMap.put(ListTools.createList(symList), emission);
109            } else {
110                //System.out.print("-");
111            }
112        }
113        else {
114            emission = computeEmissions(symList, false);
115        }
116        return emission;    
117    }
118
119    private double [] computeEmissions(List symList, boolean exorcise)
120        throws IllegalSymbolException
121    {
122        // create a Symbol array to permit lookup of
123        // the Symbol to query the emission Distributions with
124        // for different values of advance for an EmissionState.
125        Symbol sym[][] = new Symbol[2][2];
126//        List ll = ListTools.createList(symList);
127        sym[0][0] = AlphabetManager.getGapSymbol();
128        sym[1][1] = alpha.getSymbol(Arrays.asList(new Symbol [] {
129            (Symbol) symList.get(0),
130            (Symbol) symList.get(1)
131            }));
132        sym[1][0] = alpha.getSymbol(Arrays.asList(new Symbol [] {
133            (Symbol) symList.get(0),
134            gap[1]
135            }));
136        sym[0][1] = alpha.getSymbol(Arrays.asList(new Symbol [] {
137            gap[0],
138            (Symbol) symList.get(1)
139            }));
140
141        double [] emission = new double[dsi];
142
143        // compute the log(emission probability)
144        for(int i = 0; i < dsi; i++) {
145            if (exorcise && (states[i] instanceof MagicalState)) {
146                // exclude emission from MagicalState
147                emission[i] = Double.NEGATIVE_INFINITY;
148            }
149            else {
150                EmissionState es = (EmissionState) states[i];
151                int [] advance = es.getAdvance();
152                Distribution dis = es.getDistribution();
153                Symbol s = sym[advance[0]][advance[1]];
154                emission[i] = Math.log(scoreType.calculateScore(dis, s));
155            }
156        }
157
158        return emission;
159    }
160
161  public void clear() {
162    eMap.clear();
163  }
164}