001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.dp; 024 025import org.biojava.bio.Annotation; 026import org.biojava.bio.BioError; 027import org.biojava.bio.dist.DistributionFactory; 028import org.biojava.bio.symbol.Alphabet; 029import org.biojava.bio.symbol.IllegalAlphabetException; 030import org.biojava.bio.symbol.IllegalSymbolException; 031import org.biojava.utils.ChangeVetoException; 032 033/** 034 * @author Matthew Pocock 035 * @author Lachlan Coin 036 */ 037public class ProfileHMM extends SimpleMarkovModel { 038 /** 039 * The advance array. 040 */ 041 private final static int [] advance = { 1 }; 042 043 /** 044 * The number of columns in this model. 045 */ 046 private final int columns; 047 048 /** 049 * Match states array. 050 * <p> 051 * matchStates[0] == matchStates[columns+1] == magicalState(). 052 */ 053 private final EmissionState [] matchStates; 054 055 /** 056 * Insert states array. 057 * <p> 058 * From 0 .. columns(). 059 */ 060 private final EmissionState [] insertStates; 061 062 /** 063 * Delete states array. 064 * <p> 065 * From 0 .. columns()-1 corresponding to indexes 1..columns(). 066 */ 067 private final DotState [] deleteStates; 068 069 /** 070 * Retrieve the number of columns in the model. 071 * 072 * @return the number of columns 073 */ 074 public int columns() { 075 return columns; 076 } 077 078 /** 079 * Retrieve the match state at column indx. 080 * <p> 081 * The first match state is at index 1, and the last match state is at 082 * column columns(). The states at index 0 and columns()+1 are both the 083 * magical state. This is so that the whole model backbone can be addressed 084 * without writing lots of special-case code. 085 * 086 * @param indx the index of the column to retrieve the match state for 087 * @return the match state for column indx 088 * @throws IndexOutOfBoundsException if indx is negative or above columns()+1 089 */ 090 public EmissionState getMatch(int indx) 091 throws IndexOutOfBoundsException { 092 if(indx < 0 || indx > (columns+1) ) { 093 throw new IndexOutOfBoundsException( 094 "Match-state index must be within (0.." + (columns + 1) + "), not " + indx 095 ); 096 } 097 098 return matchStates[indx]; 099 } 100 101 /** 102 * Retrieves the insert state at column indx. 103 * <p> 104 * Insert_0 is the insert that is accessible directly from the magical state. 105 * Insert_1..columns() are 'above' each propper match state. There is no 106 * insert state above the magical state at the end of the model, as insert_columns 107 * already models trailing inserts. 108 * 109 * @param indx the index of the column to retrieve the insert state for 110 * @return the insert state for column indx 111 * @throws IndexOutOfBoundsException if indx is negative or above columns() 112 */ 113 public EmissionState getInsert(int indx) 114 throws IndexOutOfBoundsException { 115 if(indx < 0 || indx > columns ) { 116 throw new IndexOutOfBoundsException( 117 "Insert-state index must be within (0.." + columns + "), not " + indx 118 ); 119 } 120 121 return insertStates[indx]; 122 } 123 124 /** 125 * Retrieves the delete state for column indx. 126 * <p> 127 * Delete states are 'above' the match state that they delete. There is no 128 * delete state for the magical state at either the beginning or end of the 129 * model, so the delete state indx can range within (1..columns()). 130 * 131 * @param indx the index of the column to retrieve the insert state for 132 * @return the insert state for column indx 133 * @throws IndexOutOfBoundsException if indx is negative or above columns() 134 */ 135 public DotState getDelete(int indx) 136 throws IndexOutOfBoundsException { 137 if(indx < 1 || indx > columns ) { 138 throw new IndexOutOfBoundsException( 139 "delete-state index must be within (1.." + columns + "), not " + indx 140 ); 141 } 142 143 return deleteStates[indx-1]; 144 } 145 146 /** 147 * @deprecated 148 */ 149 public ProfileHMM( 150 Alphabet alpha, int columns, 151 DistributionFactory matchFactory, DistributionFactory insertFactory) throws IllegalSymbolException, IllegalTransitionException, IllegalAlphabetException { 152 this(alpha, columns, matchFactory, insertFactory,""); 153 } 154 155 156 157 /** 158 * Create a new ProfileHMM. 159 * <p> 160 * The profile will be over the Alphabet alpha. It will have 'columns' match states, 161 * 'columns' delete states and 'columns'+1 insert states. The match states will be 162 * created by matchFactory, and the insert states will be created by 163 * insertFactory. This gives you great freedom for changing how the states are 164 * implemented. For example, insertFactory may always return views onto a single 165 * underlying probability distribution in which case all insert states will 166 * emit the same type of stuff - or it may create an individual state for 167 * each insert state, in which case each insert can be a different type of thing. 168 * You could make the insert state a view onto your null model, or onto a 169 * protein-specific distribution. 170 * 171 * @param alpha the Alphabet that the profile hmm will emit 172 * @param columns the number of match states 173 * @param matchFactory the StateFactory to use for creating the match states 174 * @param insertFactory the stateFactory to use for creating the insert states 175 */ 176 public ProfileHMM( 177 Alphabet alpha, int columns, 178 DistributionFactory matchFactory, DistributionFactory insertFactory, String name) 179 throws IllegalSymbolException, IllegalTransitionException, 180 IllegalAlphabetException { 181 super(1, alpha,name); 182 183 try { 184 this.columns = columns; 185 this.matchStates = new EmissionState[columns+2]; 186 this.insertStates = new EmissionState[columns+1]; 187 this.deleteStates = new DotState[columns]; 188 189 EmissionState mO = magicalState(); 190 EmissionState iO = new SimpleEmissionState( 191 "i-0", 192 Annotation.EMPTY_ANNOTATION, 193 advance, 194 insertFactory.createDistribution(alpha) 195 ); 196 197 matchStates[0] = mO; 198 insertStates[0] = iO; 199 200 // first column - a leading insert & the magical state 201 addState(iO); 202 203 // 'body' columns 204 for(int i = 1; i <= columns; i++) { 205 EmissionState mN = new SimpleEmissionState( 206 "m-" + i, 207 Annotation.EMPTY_ANNOTATION, 208 advance, 209 matchFactory.createDistribution(alpha) 210 ); 211 EmissionState iN = new SimpleEmissionState( 212 "i-" + i, 213 Annotation.EMPTY_ANNOTATION, 214 advance, 215 insertFactory.createDistribution(alpha) 216 ); 217 DotState dN = new SimpleDotState("d-" + i); 218 219 addState(mN); 220 addState(iN); 221 addState(dN); 222 223 matchStates[i] = mN; 224 insertStates[i] = iN; 225 deleteStates[i-1] = dN; 226 227 228 mO = mN; 229 iO = iN; 230 } 231 matchStates[columns+1] = magicalState(); 232 connectModel(); 233 } catch (ChangeVetoException cve) { 234 throw new BioError( 235 236 "Unable to construct profile HMM", cve 237 ); 238 } 239 } 240 241 /** This is called by constructor in setting up the allowed transitions in the model */ 242 protected void connectModel() throws 243 ChangeVetoException, IllegalSymbolException, IllegalTransitionException,IllegalAlphabetException{ 244 EmissionState mO = getMatch(0); 245 EmissionState iO = getInsert(0); 246 DotState dO = null; 247 createTransition(mO,iO); 248 createTransition(iO,iO); 249 for(int i = 1; i <= columns(); i++){ 250 EmissionState mN = getMatch(i); 251 EmissionState iN = getInsert(i); 252 DotState dN = getDelete(i); 253 254 // from a model state 255 createTransition(mO, mN); 256 createTransition(mN, iN); 257 createTransition(mO, dN); 258 259 // from an insert state 260 createTransition(iN, iN); 261 createTransition(iO, mN); 262 createTransition(iO, dN); 263 264 // from a delete state 265 if(i > 1) { 266 createTransition(dO, dN); 267 createTransition(dO, mN); 268 } 269 createTransition(dN,iN); 270 271 mO = mN; 272 iO = iN; 273 dO = dN; 274 } 275 // for the transitions to end 276 createTransition(mO, magicalState()); 277 createTransition(iO, magicalState()); 278 createTransition(dO, magicalState()); 279 } 280 281}