001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.dp.twohead; 024 025import java.io.Serializable; 026import java.util.Arrays; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030 031import org.biojava.bio.dist.Distribution; 032import org.biojava.bio.dp.EmissionState; 033import org.biojava.bio.dp.MagicalState; 034import org.biojava.bio.dp.ScoreType; 035import org.biojava.bio.dp.State; 036import org.biojava.bio.symbol.Alphabet; 037import org.biojava.bio.symbol.AlphabetManager; 038import org.biojava.bio.symbol.IllegalSymbolException; 039import org.biojava.bio.symbol.Symbol; 040import org.biojava.utils.ListTools; 041 042/** 043 * Cache for columns of emission probabilities in pair-wise alignment 044 * algorithms. 045 * 046 * @author Matthew Pocock 047 * @author David Huen (fixes for magical state) 048 */ 049public class EmissionCache implements Serializable{ 050 private final Map eMap; 051 private final Alphabet alpha; 052 private final State[] states; 053 private final int dsi; 054 private final ScoreType scoreType; 055 private final Symbol[] gap; 056 057 public EmissionCache( 058 Alphabet alpha, 059 State[] states, 060 int dsi, 061 ScoreType scoreType 062 ) { 063 this.eMap = new HashMap(); 064 this.alpha = alpha; 065 this.states = states; 066 this.dsi = dsi; 067 this.scoreType = scoreType; 068 069 List alphas = alpha.getAlphabets(); 070 this.gap = new Symbol[alphas.size()]; 071 for(int i = 0; i < this.gap.length; i++) { 072 this.gap[i] = ((Alphabet) alphas.get(i)).getGapSymbol(); 073 } 074 } 075 076 public final double [] getEmissions(List symList) 077 throws IllegalSymbolException 078 { 079 return getEmissions(symList, true); 080 } 081 082 /** 083 * Retrieve the emission scores from the cache for every EmissionState 084 * for the specified symbols. The scores 085 * will be computed and cached if not already available. 086 * <p> 087 * The correct behaviour should be to exclude emission from the 088 * MagicalState except at the origin and bottom-right of the DP 089 * matrix. As such, the emission vector is only cached when it 090 * it is computed for exorcise set to true. The vector is computed 091 * afresh every time when exorcise is false. it should be noted that 092 * if exorcise is <b>NOT<b> set to false with the emission vector 093 * at the ends of the matrix, the model will fail as the start and 094 * and end transitions become impossible. 095 * 096 * @param symList a list of the symbols in each head that require a lookup for emission probabilities. 097 * @param exorcise Prevents emission from the MagicalState. 098 */ 099 public final double [] getEmissions(List symList, boolean exorcise) 100 throws IllegalSymbolException 101 { 102 double [] emission; 103 if (exorcise) { 104 emission = (double []) eMap.get(symList); 105 if(emission == null) { 106 emission = computeEmissions(symList, true); 107 108 eMap.put(ListTools.createList(symList), emission); 109 } else { 110 //System.out.print("-"); 111 } 112 } 113 else { 114 emission = computeEmissions(symList, false); 115 } 116 return emission; 117 } 118 119 private double [] computeEmissions(List symList, boolean exorcise) 120 throws IllegalSymbolException 121 { 122 // create a Symbol array to permit lookup of 123 // the Symbol to query the emission Distributions with 124 // for different values of advance for an EmissionState. 125 Symbol sym[][] = new Symbol[2][2]; 126// List ll = ListTools.createList(symList); 127 sym[0][0] = AlphabetManager.getGapSymbol(); 128 sym[1][1] = alpha.getSymbol(Arrays.asList(new Symbol [] { 129 (Symbol) symList.get(0), 130 (Symbol) symList.get(1) 131 })); 132 sym[1][0] = alpha.getSymbol(Arrays.asList(new Symbol [] { 133 (Symbol) symList.get(0), 134 gap[1] 135 })); 136 sym[0][1] = alpha.getSymbol(Arrays.asList(new Symbol [] { 137 gap[0], 138 (Symbol) symList.get(1) 139 })); 140 141 double [] emission = new double[dsi]; 142 143 // compute the log(emission probability) 144 for(int i = 0; i < dsi; i++) { 145 if (exorcise && (states[i] instanceof MagicalState)) { 146 // exclude emission from MagicalState 147 emission[i] = Double.NEGATIVE_INFINITY; 148 } 149 else { 150 EmissionState es = (EmissionState) states[i]; 151 int [] advance = es.getAdvance(); 152 Distribution dis = es.getDistribution(); 153 Symbol s = sym[advance[0]][advance[1]]; 154 emission[i] = Math.log(scoreType.calculateScore(dis, s)); 155 } 156 } 157 158 return emission; 159 } 160 161 public void clear() { 162 eMap.clear(); 163 } 164}