001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.dist;
024
025import java.io.Serializable;
026import java.lang.ref.SoftReference;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032
033import org.biojava.bio.BioError;
034import org.biojava.bio.symbol.Alphabet;
035import org.biojava.bio.symbol.AlphabetManager;
036import org.biojava.bio.symbol.AtomicSymbol;
037import org.biojava.bio.symbol.BasisSymbol;
038import org.biojava.bio.symbol.FiniteAlphabet;
039import org.biojava.bio.symbol.IllegalAlphabetException;
040import org.biojava.bio.symbol.IllegalSymbolException;
041import org.biojava.bio.symbol.Symbol;
042import org.biojava.utils.AbstractChangeable;
043import org.biojava.utils.ChangeVetoException;
044import org.biojava.utils.ListTools;
045
046/**
047 * Class for pairing up two independant distributions.
048 *
049 * @author Matthew Pocock
050 * @author Thomas Down
051 * @author Samiul Hasan
052 * @since 1.1
053 */
054
055
056public class PairDistribution
057extends AbstractChangeable
058implements Serializable, Distribution {
059  private static Map cache;
060
061  static {
062    cache = new HashMap();
063  }
064
065  /**
066   * Get a uniform null model over a PairDistribution over [first,second].
067   *
068   * @param first   the first Alphabet
069   * @param second  the second Alphabet
070   * @return    a Distribution that is a uniform distribution over the product
071   *    of first and second
072   */
073  protected static Distribution getNullModel(Distribution first, Distribution second) {
074    synchronized(cache) {
075      first = first.getNullModel();
076      second = second.getNullModel();
077      List distL = new ListTools.Doublet(first, second);
078      SoftReference ref = (SoftReference) cache.get(distL);
079      Distribution dist;
080      if(ref == null) {
081        dist = new PairDistribution(first, second);
082        cache.put(distL, new SoftReference(dist));
083      } else {
084        dist = (Distribution) ref.get();
085        if(dist == null) {
086          dist = new PairDistribution(first, second);
087          cache.put(distL, new SoftReference(dist));
088        }
089      }
090      return dist;
091    }
092  }
093
094  private Distribution first;
095  private Distribution second;
096  private Alphabet alphabet;
097
098  public Alphabet getAlphabet() {
099    return alphabet;
100  }
101
102  public Distribution getNullModel() {
103    return getNullModel(first, second);
104  }
105
106  public void setNullModel(Distribution nullModel)
107  throws IllegalAlphabetException, ChangeVetoException {
108    throw new ChangeVetoException(
109      "PairDistribution objects can't have their null models changed."
110    );
111  }
112
113  /**
114   * Register this paired distribution with a model trainer.
115   * @param trainer the trainer to register this distribution with.
116   */
117  public void registerWithTrainer(org.biojava.bio.dp.ModelTrainer trainer) {
118    trainer.registerDistribution(first);
119    trainer.registerDistribution(second);
120
121    trainer.registerTrainer(this, new PairTrainer());
122  }
123
124  public double getWeight(Symbol sym)
125  throws IllegalSymbolException {
126    if(sym instanceof BasisSymbol) {
127      List symL = ((BasisSymbol) sym).getSymbols();
128      Symbol f = (Symbol) symL.get(0);
129      Symbol s = (Symbol) symL.get(1);
130
131      return first.getWeight(f) * second.getWeight(s);
132    } else {
133      double score = 0.0;
134      for(Iterator i = ((FiniteAlphabet) sym.getMatches()).iterator();
135      i.hasNext(); ) {
136        AtomicSymbol s = (AtomicSymbol) i.next();
137        score += getWeight(s);
138      }
139      return score;
140    }
141  }
142
143  public void setWeight(Symbol sym, double weight)
144  throws ChangeVetoException {
145    throw new ChangeVetoException(
146      "Can't set the weight directly in a PairDistribution. " +
147      "You must set the weights in the underlying distributions."
148    );
149  }
150
151  /**
152   * Create a new PairDistribution that represents the product of two other
153   * distributions. The alphabet will be the product of the first and seccond
154   * distribution's alphabets, and the weights will be the products of the
155   * weights for the first and seccond distributions given the first and second
156   * component of the symbol respectively.
157   *
158   * @param first   the first distribution
159   * @param second  the second distribution
160   */
161  public PairDistribution(Distribution first, Distribution second) {
162    this.first = first;
163    this.second = second;
164    this.alphabet = AlphabetManager.getCrossProductAlphabet(
165      Arrays.asList(new Alphabet[] {
166        first.getAlphabet(), second.getAlphabet()
167      })
168    );
169  }
170
171  public void registerWithTrainer(DistributionTrainerContext dtc) {
172    dtc.registerTrainer(this, new PairTrainer());
173  }
174
175  private class PairTrainer
176  extends IgnoreCountsTrainer
177  implements Serializable {
178    public double getCount(DistributionTrainerContext dtc, AtomicSymbol as)
179    throws IllegalSymbolException {
180      getAlphabet().validate(as);
181
182      List symL = as.getSymbols();
183      Symbol f = (Symbol) symL.get(0);
184      Symbol s = (Symbol) symL.get(1);
185
186      // I don't think this is correct. Pants!
187      return
188        (dtc.getCount(first, f) + dtc.getCount(second, s)) * 0.5;
189
190    }
191
192    public void addCount(
193      DistributionTrainerContext dtc, Symbol sym, double times
194    ) throws IllegalSymbolException {
195      getAlphabet().validate(sym);
196      if(!(sym instanceof AtomicSymbol)) {
197        throw new IllegalSymbolException(
198          "Can't add counts for ambiguity symbols. Got: " +
199          sym.getName()
200        );
201      }
202      // FIXME: should get matches for symbol &
203      // divide count by null model ratioes.
204      List symL = ((BasisSymbol) sym).getSymbols();
205      Symbol f = (Symbol) symL.get(0);
206      Symbol s = (Symbol) symL.get(1);
207
208      dtc.addCount(first, f, times);
209      dtc.addCount(second, s, times);
210    }
211  }
212
213  public Symbol sampleSymbol() {
214    try {
215      return getAlphabet().getSymbol(Arrays.asList( new Symbol[] {
216        first.sampleSymbol(),
217        second.sampleSymbol()
218      }));
219    } catch (IllegalSymbolException ise) {
220      throw new BioError("Couldn't sample symbol", ise);
221    }
222  }
223}