001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.symbol;
023
024import java.util.ArrayList;
025import java.util.HashMap;
026import java.util.HashSet;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031
032import org.biojava.bio.dist.Distribution;
033import org.biojava.bio.dist.DistributionFactory;
034import org.biojava.bio.dist.DistributionTools;
035import org.biojava.bio.dist.IndexedCount;
036import org.biojava.bio.seq.ProteinTools;
037import org.biojava.bio.seq.RNATools;
038import org.biojava.utils.AbstractChangeable;
039import org.biojava.utils.ChangeListener;
040import org.biojava.utils.ChangeVetoException;
041
042/**
043 * a simple no-frills implementation of the
044 * CodonPref object that encapsulates
045 * codon preference data.
046 *
047 * @author David Huen
048 * @author gwaldon pyrrolysine
049 * @since 1.3
050 */
051public class SimpleCodonPref
052    extends AbstractChangeable
053    implements CodonPref
054{
055    String name;
056    String geneticCodeName;
057    Distribution codonPref;
058
059    // residue-based codon preference stats
060    Map codonPrefByResidue = null;
061
062    // codon wobble-based codon preference stats
063    Map wobbleDistributions;
064
065    public SimpleCodonPref(String geneticCodeName, Distribution codonPref, String name)
066        throws IllegalAlphabetException
067    {
068        this.name = name;
069        this.geneticCodeName = geneticCodeName;
070        this.codonPref = codonPref;
071
072        // validate the Distribution
073        if (codonPref.getAlphabet() != RNATools.getCodonAlphabet())
074            throw new IllegalAlphabetException("codon preferences must be over codon alphabet");
075    }
076
077    public String getName()
078    {
079        return name;
080    }
081
082    public String getGeneticCodeName()
083    {
084        return geneticCodeName;
085    }
086
087    public ManyToOneTranslationTable getGeneticCode()
088    {
089        return RNATools.getGeneticCode(geneticCodeName);
090    }
091
092    public Distribution getFrequency()
093    {
094        return codonPref;
095    }
096
097    public Distribution getFrequencyForSynonyms(Symbol residue)
098        throws IllegalSymbolException
099    {
100        if (codonPrefByResidue == null) preparePrefsByResidue();
101
102        return (Distribution) codonPrefByResidue.get(residue);
103    }
104
105    public WobbleDistribution getWobbleDistributionForSynonyms(Symbol residue)
106        throws IllegalSymbolException
107    {
108        if (wobbleDistributions == null) preparePrefsByWobble();
109
110        return (WobbleDistribution) wobbleDistributions.get(residue);
111    }
112
113    private void preparePrefsByResidue()
114        throws IllegalSymbolException
115    {
116        try {
117            codonPrefByResidue = new HashMap();
118
119            // what we want is to create residue-specific distributions
120
121            for (Iterator residueI = ProteinTools.getTAlphabet().iterator(); residueI.hasNext(); ) {
122                Symbol residue = (Symbol) residueI.next();
123
124                
125                // filter out selenocysteine!
126                if (residue.getName().equals("SEC")) continue;
127                // filter out pyrrolysine!
128                if (residue.getName().equals("PYL")) continue;
129                
130                // get the synonymous codons and sum their frequencies
131                double residueFreq = 0.0;
132                Set synonyms = getGeneticCode().untranslate(residue);
133
134                for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) {
135                     Symbol synonym = (Symbol) synonymI.next();
136
137                    // sum frequency of synonyms for this residue
138                    residueFreq += codonPref.getWeight(synonym);
139                }
140
141                // now create a new distribution over the synonyms
142                Distribution residueCodonDist = DistributionFactory.DEFAULT.createDistribution(RNATools.getCodonAlphabet());
143
144                for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) {
145                    Symbol synonym = (Symbol) synonymI.next();
146
147                    // compute the probability of the current codon
148                    residueCodonDist.setWeight(synonym, codonPref.getWeight(synonym)/residueFreq);            
149                }
150
151                // lock the Distribution and stash in map for later use
152                residueCodonDist.addChangeListener(ChangeListener.ALWAYS_VETO);
153                codonPrefByResidue.put(residue, residueCodonDist);
154            }
155        }
156        catch (ChangeVetoException cve) {}
157        catch (IllegalAlphabetException iae) {} // none of these should be thrown since the alphabet was preverified.
158    }
159
160    private void preparePrefsByWobble()
161        throws IllegalSymbolException
162    {
163        try {
164            wobbleDistributions = new HashMap();
165
166            // what we want is to create residue-specific distributions
167            FiniteAlphabet nonWobbleAlfa = CodonPrefTools.getDinucleotideAlphabet();
168
169            for (Iterator residueI = ProteinTools.getTAlphabet().iterator(); residueI.hasNext(); ) {
170                Symbol residue = (Symbol) residueI.next();
171
172                // filter out selenocysteine!
173                if (residue.getName().equals("SEC")) continue;
174                // filter out pyrrolysine!
175                if (residue.getName().equals("PYL")) continue;
176                
177                // create bins keyed on non-wobble bases
178                IndexedCount nonWobbleCounts = new IndexedCount(nonWobbleAlfa);;
179                IndexedCount wobbleCounts;
180
181                Map wobbleDists = new HashMap();
182                Set nonWobbleBases = new HashSet();
183
184                // get the synonymous codons
185                Set synonyms = getGeneticCode().untranslate(residue);
186
187                for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) {
188                     BasisSymbol synonym = (BasisSymbol) synonymI.next();
189
190                     // retrieve the non-wobble bases for these codons
191                     List codonSymbols = synonym.getSymbols();
192                     AtomicSymbol wobble = (AtomicSymbol) codonSymbols.get(2);
193
194                     List nonWobbleSymbols = new ArrayList(2);
195                     nonWobbleSymbols.add(codonSymbols.get(0));
196                     nonWobbleSymbols.add(codonSymbols.get(1));
197                     AtomicSymbol nonWobble = (AtomicSymbol) nonWobbleAlfa.getSymbol(nonWobbleSymbols);
198                     nonWobbleBases.add(nonWobble);
199
200                     // add counts to the appropriate Count objects
201                     double codonFreq = codonPref.getWeight(synonym);
202                     nonWobbleCounts.increaseCount(nonWobble, codonFreq);
203
204                     // add Counts
205                     wobbleCounts = (IndexedCount) wobbleDists.get(nonWobble);
206                     if (wobbleCounts == null) {
207                         wobbleCounts = new IndexedCount(RNATools.getRNA());
208                         wobbleDists.put(nonWobble, wobbleCounts);
209                     }
210
211                     wobbleCounts.increaseCount(wobble, codonFreq);
212                }
213
214                // convert the accumulated Counts into Distributions
215                Distribution nonWobbleDist = DistributionTools.countToDistribution(nonWobbleCounts);
216
217                for (Iterator nonWobbleBasesI = nonWobbleBases.iterator(); nonWobbleBasesI.hasNext(); ) {
218                    AtomicSymbol nonWobbleBase = (AtomicSymbol) nonWobbleBasesI.next();
219
220                    // retrieve and replace each Count with its corresponding Distribution
221                    IndexedCount count = (IndexedCount) wobbleDists.get(nonWobbleBase);
222
223                    if (count != null) {
224                        Distribution wobbleDist = DistributionTools.countToDistribution(count);
225                        wobbleDists.put(nonWobbleBase, wobbleDist);
226                    }
227                }
228                wobbleDistributions.put(residue, new SimpleWobbleDistribution(residue, nonWobbleBases, nonWobbleDist, wobbleDists));
229            }
230        }
231        catch (IllegalAlphabetException iae) {System.err.println("unexpected IllegalAlphabetException"); }
232        catch (ChangeVetoException cve) {System.err.println("unexpected ChangeVetoException"); }
233    }
234}
235