001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojavax.ga.util;
024
025import java.util.Collections;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Set;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.BioException;
032import org.biojava.bio.dist.Distribution;
033import org.biojava.bio.dist.OrderNDistribution;
034import org.biojava.bio.dist.OrderNDistributionFactory;
035import org.biojava.bio.dist.UniformDistribution;
036import org.biojava.bio.seq.io.CharacterTokenization;
037import org.biojava.bio.seq.io.SymbolTokenization;
038import org.biojava.bio.symbol.Alphabet;
039import org.biojava.bio.symbol.AlphabetIndex;
040import org.biojava.bio.symbol.AlphabetManager;
041import org.biojava.bio.symbol.AtomicSymbol;
042import org.biojava.bio.symbol.FiniteAlphabet;
043import org.biojava.bio.symbol.IllegalAlphabetException;
044import org.biojava.bio.symbol.IllegalSymbolException;
045import org.biojava.bio.symbol.SimpleAlphabet;
046import org.biojava.bio.symbol.SimpleSymbolList;
047import org.biojava.bio.symbol.SymbolList;
048import org.biojava.utils.ChangeVetoException;
049
050
051/**
052 * <p> Utility methods for the GA library
053 * 
054 * @author Mark Schreiber
055 * @version 1.0
056 * @since 1.5
057 */
058
059public final class GATools {
060
061  private static SimpleAlphabet binary;
062  private static AtomicSymbol zero;
063  private static AtomicSymbol one;
064
065  static{
066    zero = AlphabetManager.createSymbol("zero");
067    one = AlphabetManager.createSymbol("one");
068
069    Set syms = new HashSet();
070    syms.add(zero); syms.add(one);
071
072    binary = new SimpleAlphabet(syms, "GA_Binary");
073    CharacterTokenization tk = new CharacterTokenization(binary, false);
074    tk.bindSymbol(zero, '0');
075    tk.bindSymbol(one, '1');
076
077    binary.putTokenization("token", tk);
078
079    AlphabetManager.registerAlphabet(binary.getName(), binary);
080  }
081
082  /**
083   * Gets a Reference to the FlyWeight GA_Binary <code>Alphabet</code>.
084   * It contains the Symbols one and zero.
085   * @return the finite, flyweight Binary Alphabet
086   */
087  public static FiniteAlphabet getBinaryAlphabet(){
088    return binary;
089  }
090
091  /**
092   * @return the GA_Binary symbol "one"
093   */
094  public static AtomicSymbol one(){
095    return one;
096  }
097
098  /**
099   * Creates a <code>SymbolList</code> in the GABinary <code>Alphabet</code>
100   * @param binarySequence a String like "01010000101010101" with no white space
101   * @return a <code>SymbolList</code> parsed from <code>binarySequence</code>
102   * @throws IllegalSymbolException if a character other than 1 or 0 is found.
103   */
104  public static SymbolList createBinary(String binarySequence)
105       throws IllegalSymbolException{
106
107    SymbolTokenization toke = null;
108    try {
109      toke =
110          getBinaryAlphabet().getTokenization("token");
111    }
112    catch (BioException ex) {
113      throw new BioError("Cannot make binary tokenization", ex);
114    }
115
116    return new SimpleSymbolList(toke, binarySequence);
117  }
118
119  /**
120   * @return the GA_Binary symbol "zero"
121   */
122  public static AtomicSymbol zero(){
123    return zero;
124  }
125
126  /**
127   * Makes a 1st order distribution which is infact uniform (equivalent to a
128   * uniform zero order distribution).
129   * @param a the zero order Alphabet which will be multiplied into the 1st order alphabet
130   * @return the "1st order" distribution
131   * @throws IllegalAlphabetException if the Distribution cannot be constructed from <code>a</code>.
132   */
133  public static OrderNDistribution uniformMutationDistribution(FiniteAlphabet a) throws IllegalAlphabetException{
134    List l = Collections.nCopies(2, a);
135    Alphabet alpha = AlphabetManager.getCrossProductAlphabet(l);
136
137    OrderNDistribution d =
138        (OrderNDistribution)OrderNDistributionFactory.DEFAULT.createDistribution(alpha);
139
140    AlphabetIndex ind = AlphabetManager.getAlphabetIndex(a);
141    UniformDistribution u = new UniformDistribution(a);
142    for(int i = 0; i < a.size(); i++){
143      try {
144        d.setDistribution(ind.symbolForIndex(i), u);
145      }
146      catch (IllegalSymbolException ex) {
147        throw new BioError(ex); //shouldn't happen
148      }
149    }
150    return d;
151  }
152
153  /**
154   * Makes a mutation <code>Distribution</code> where the probability
155   * of a <code>Symbol</code> being mutated to itself is zero and the
156   * probability of it being changed to any other <code>Symbol</code> in
157   * the <code>Alphabet a</code> is <code>1.0 / (a.size() - 1.0)</code>
158   * @param a the <code>FiniteAlphabet</code> which mutations are sampled from.
159   * @return A <code>Distribution</code> suitable for use in a <code>MutationFunction</code>
160   * @throws IllegalAlphabetException if the <code>Distribution</code> cannot be made
161   * over the <code>FiniteAlphabet</code>
162   */
163  public static OrderNDistribution standardMutationDistribution(FiniteAlphabet a) throws IllegalAlphabetException{
164    List l = Collections.nCopies(2, a);
165    Alphabet alpha = AlphabetManager.getCrossProductAlphabet(l);
166
167    OrderNDistribution d =
168        (OrderNDistribution)OrderNDistributionFactory.DEFAULT.createDistribution(alpha);
169
170    AlphabetIndex ind = AlphabetManager.getAlphabetIndex(a);
171    for(int i = 0; i < a.size(); i++){
172      try {
173        Distribution sub_dist = d.getDistribution(ind.symbolForIndex(i));
174
175        AlphabetIndex ind2 = AlphabetManager.getAlphabetIndex(a);
176        for (int j = 0; j < a.size(); j++){
177          if(ind.symbolForIndex(i) == ind2.symbolForIndex(j)){
178            sub_dist.setWeight(ind2.symbolForIndex(j), 0.0);
179          }else{
180            sub_dist.setWeight(ind2.symbolForIndex(j), 1.0/ (double)(a.size() -1));
181          }
182        }
183      }catch (IllegalSymbolException ex) {
184        throw new BioError(ex); //shouldn't happen
185      }catch (ChangeVetoException ex){
186        throw new BioError(ex); //shouldn't happen
187      }
188    }
189    return d;
190  }
191}