001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.dist; 024 025import java.io.Serializable; 026import java.lang.ref.SoftReference; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032 033import org.biojava.bio.BioError; 034import org.biojava.bio.symbol.Alphabet; 035import org.biojava.bio.symbol.AlphabetManager; 036import org.biojava.bio.symbol.AtomicSymbol; 037import org.biojava.bio.symbol.BasisSymbol; 038import org.biojava.bio.symbol.FiniteAlphabet; 039import org.biojava.bio.symbol.IllegalAlphabetException; 040import org.biojava.bio.symbol.IllegalSymbolException; 041import org.biojava.bio.symbol.Symbol; 042import org.biojava.utils.AbstractChangeable; 043import org.biojava.utils.ChangeVetoException; 044import org.biojava.utils.ListTools; 045 046/** 047 * Class for pairing up two independant distributions. 048 * 049 * @author Matthew Pocock 050 * @author Thomas Down 051 * @author Samiul Hasan 052 * @since 1.1 053 */ 054 055 056public class PairDistribution 057extends AbstractChangeable 058implements Serializable, Distribution { 059 private static Map cache; 060 061 static { 062 cache = new HashMap(); 063 } 064 065 /** 066 * Get a uniform null model over a PairDistribution over [first,second]. 067 * 068 * @param first the first Alphabet 069 * @param second the second Alphabet 070 * @return a Distribution that is a uniform distribution over the product 071 * of first and second 072 */ 073 protected static Distribution getNullModel(Distribution first, Distribution second) { 074 synchronized(cache) { 075 first = first.getNullModel(); 076 second = second.getNullModel(); 077 List distL = new ListTools.Doublet(first, second); 078 SoftReference ref = (SoftReference) cache.get(distL); 079 Distribution dist; 080 if(ref == null) { 081 dist = new PairDistribution(first, second); 082 cache.put(distL, new SoftReference(dist)); 083 } else { 084 dist = (Distribution) ref.get(); 085 if(dist == null) { 086 dist = new PairDistribution(first, second); 087 cache.put(distL, new SoftReference(dist)); 088 } 089 } 090 return dist; 091 } 092 } 093 094 private Distribution first; 095 private Distribution second; 096 private Alphabet alphabet; 097 098 public Alphabet getAlphabet() { 099 return alphabet; 100 } 101 102 public Distribution getNullModel() { 103 return getNullModel(first, second); 104 } 105 106 public void setNullModel(Distribution nullModel) 107 throws IllegalAlphabetException, ChangeVetoException { 108 throw new ChangeVetoException( 109 "PairDistribution objects can't have their null models changed." 110 ); 111 } 112 113 /** 114 * Register this paired distribution with a model trainer. 115 * @param trainer the trainer to register this distribution with. 116 */ 117 public void registerWithTrainer(org.biojava.bio.dp.ModelTrainer trainer) { 118 trainer.registerDistribution(first); 119 trainer.registerDistribution(second); 120 121 trainer.registerTrainer(this, new PairTrainer()); 122 } 123 124 public double getWeight(Symbol sym) 125 throws IllegalSymbolException { 126 if(sym instanceof BasisSymbol) { 127 List symL = ((BasisSymbol) sym).getSymbols(); 128 Symbol f = (Symbol) symL.get(0); 129 Symbol s = (Symbol) symL.get(1); 130 131 return first.getWeight(f) * second.getWeight(s); 132 } else { 133 double score = 0.0; 134 for(Iterator i = ((FiniteAlphabet) sym.getMatches()).iterator(); 135 i.hasNext(); ) { 136 AtomicSymbol s = (AtomicSymbol) i.next(); 137 score += getWeight(s); 138 } 139 return score; 140 } 141 } 142 143 public void setWeight(Symbol sym, double weight) 144 throws ChangeVetoException { 145 throw new ChangeVetoException( 146 "Can't set the weight directly in a PairDistribution. " + 147 "You must set the weights in the underlying distributions." 148 ); 149 } 150 151 /** 152 * Create a new PairDistribution that represents the product of two other 153 * distributions. The alphabet will be the product of the first and seccond 154 * distribution's alphabets, and the weights will be the products of the 155 * weights for the first and seccond distributions given the first and second 156 * component of the symbol respectively. 157 * 158 * @param first the first distribution 159 * @param second the second distribution 160 */ 161 public PairDistribution(Distribution first, Distribution second) { 162 this.first = first; 163 this.second = second; 164 this.alphabet = AlphabetManager.getCrossProductAlphabet( 165 Arrays.asList(new Alphabet[] { 166 first.getAlphabet(), second.getAlphabet() 167 }) 168 ); 169 } 170 171 public void registerWithTrainer(DistributionTrainerContext dtc) { 172 dtc.registerTrainer(this, new PairTrainer()); 173 } 174 175 private class PairTrainer 176 extends IgnoreCountsTrainer 177 implements Serializable { 178 public double getCount(DistributionTrainerContext dtc, AtomicSymbol as) 179 throws IllegalSymbolException { 180 getAlphabet().validate(as); 181 182 List symL = as.getSymbols(); 183 Symbol f = (Symbol) symL.get(0); 184 Symbol s = (Symbol) symL.get(1); 185 186 // I don't think this is correct. Pants! 187 return 188 (dtc.getCount(first, f) + dtc.getCount(second, s)) * 0.5; 189 190 } 191 192 public void addCount( 193 DistributionTrainerContext dtc, Symbol sym, double times 194 ) throws IllegalSymbolException { 195 getAlphabet().validate(sym); 196 if(!(sym instanceof AtomicSymbol)) { 197 throw new IllegalSymbolException( 198 "Can't add counts for ambiguity symbols. Got: " + 199 sym.getName() 200 ); 201 } 202 // FIXME: should get matches for symbol & 203 // divide count by null model ratioes. 204 List symL = ((BasisSymbol) sym).getSymbols(); 205 Symbol f = (Symbol) symL.get(0); 206 Symbol s = (Symbol) symL.get(1); 207 208 dtc.addCount(first, f, times); 209 dtc.addCount(second, s, times); 210 } 211 } 212 213 public Symbol sampleSymbol() { 214 try { 215 return getAlphabet().getSymbol(Arrays.asList( new Symbol[] { 216 first.sampleSymbol(), 217 second.sampleSymbol() 218 })); 219 } catch (IllegalSymbolException ise) { 220 throw new BioError("Couldn't sample symbol", ise); 221 } 222 } 223}