001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.symbol; 023 024import java.util.ArrayList; 025import java.util.HashMap; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031 032import org.biojava.bio.dist.Distribution; 033import org.biojava.bio.dist.DistributionFactory; 034import org.biojava.bio.dist.DistributionTools; 035import org.biojava.bio.dist.IndexedCount; 036import org.biojava.bio.seq.ProteinTools; 037import org.biojava.bio.seq.RNATools; 038import org.biojava.utils.AbstractChangeable; 039import org.biojava.utils.ChangeListener; 040import org.biojava.utils.ChangeVetoException; 041 042/** 043 * a simple no-frills implementation of the 044 * CodonPref object that encapsulates 045 * codon preference data. 046 * 047 * @author David Huen 048 * @author gwaldon pyrrolysine 049 * @since 1.3 050 */ 051public class SimpleCodonPref 052 extends AbstractChangeable 053 implements CodonPref 054{ 055 String name; 056 String geneticCodeName; 057 Distribution codonPref; 058 059 // residue-based codon preference stats 060 Map codonPrefByResidue = null; 061 062 // codon wobble-based codon preference stats 063 Map wobbleDistributions; 064 065 public SimpleCodonPref(String geneticCodeName, Distribution codonPref, String name) 066 throws IllegalAlphabetException 067 { 068 this.name = name; 069 this.geneticCodeName = geneticCodeName; 070 this.codonPref = codonPref; 071 072 // validate the Distribution 073 if (codonPref.getAlphabet() != RNATools.getCodonAlphabet()) 074 throw new IllegalAlphabetException("codon preferences must be over codon alphabet"); 075 } 076 077 public String getName() 078 { 079 return name; 080 } 081 082 public String getGeneticCodeName() 083 { 084 return geneticCodeName; 085 } 086 087 public ManyToOneTranslationTable getGeneticCode() 088 { 089 return RNATools.getGeneticCode(geneticCodeName); 090 } 091 092 public Distribution getFrequency() 093 { 094 return codonPref; 095 } 096 097 public Distribution getFrequencyForSynonyms(Symbol residue) 098 throws IllegalSymbolException 099 { 100 if (codonPrefByResidue == null) preparePrefsByResidue(); 101 102 return (Distribution) codonPrefByResidue.get(residue); 103 } 104 105 public WobbleDistribution getWobbleDistributionForSynonyms(Symbol residue) 106 throws IllegalSymbolException 107 { 108 if (wobbleDistributions == null) preparePrefsByWobble(); 109 110 return (WobbleDistribution) wobbleDistributions.get(residue); 111 } 112 113 private void preparePrefsByResidue() 114 throws IllegalSymbolException 115 { 116 try { 117 codonPrefByResidue = new HashMap(); 118 119 // what we want is to create residue-specific distributions 120 121 for (Iterator residueI = ProteinTools.getTAlphabet().iterator(); residueI.hasNext(); ) { 122 Symbol residue = (Symbol) residueI.next(); 123 124 125 // filter out selenocysteine! 126 if (residue.getName().equals("SEC")) continue; 127 // filter out pyrrolysine! 128 if (residue.getName().equals("PYL")) continue; 129 130 // get the synonymous codons and sum their frequencies 131 double residueFreq = 0.0; 132 Set synonyms = getGeneticCode().untranslate(residue); 133 134 for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) { 135 Symbol synonym = (Symbol) synonymI.next(); 136 137 // sum frequency of synonyms for this residue 138 residueFreq += codonPref.getWeight(synonym); 139 } 140 141 // now create a new distribution over the synonyms 142 Distribution residueCodonDist = DistributionFactory.DEFAULT.createDistribution(RNATools.getCodonAlphabet()); 143 144 for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) { 145 Symbol synonym = (Symbol) synonymI.next(); 146 147 // compute the probability of the current codon 148 residueCodonDist.setWeight(synonym, codonPref.getWeight(synonym)/residueFreq); 149 } 150 151 // lock the Distribution and stash in map for later use 152 residueCodonDist.addChangeListener(ChangeListener.ALWAYS_VETO); 153 codonPrefByResidue.put(residue, residueCodonDist); 154 } 155 } 156 catch (ChangeVetoException cve) {} 157 catch (IllegalAlphabetException iae) {} // none of these should be thrown since the alphabet was preverified. 158 } 159 160 private void preparePrefsByWobble() 161 throws IllegalSymbolException 162 { 163 try { 164 wobbleDistributions = new HashMap(); 165 166 // what we want is to create residue-specific distributions 167 FiniteAlphabet nonWobbleAlfa = CodonPrefTools.getDinucleotideAlphabet(); 168 169 for (Iterator residueI = ProteinTools.getTAlphabet().iterator(); residueI.hasNext(); ) { 170 Symbol residue = (Symbol) residueI.next(); 171 172 // filter out selenocysteine! 173 if (residue.getName().equals("SEC")) continue; 174 // filter out pyrrolysine! 175 if (residue.getName().equals("PYL")) continue; 176 177 // create bins keyed on non-wobble bases 178 IndexedCount nonWobbleCounts = new IndexedCount(nonWobbleAlfa);; 179 IndexedCount wobbleCounts; 180 181 Map wobbleDists = new HashMap(); 182 Set nonWobbleBases = new HashSet(); 183 184 // get the synonymous codons 185 Set synonyms = getGeneticCode().untranslate(residue); 186 187 for (Iterator synonymI = synonyms.iterator(); synonymI.hasNext(); ) { 188 BasisSymbol synonym = (BasisSymbol) synonymI.next(); 189 190 // retrieve the non-wobble bases for these codons 191 List codonSymbols = synonym.getSymbols(); 192 AtomicSymbol wobble = (AtomicSymbol) codonSymbols.get(2); 193 194 List nonWobbleSymbols = new ArrayList(2); 195 nonWobbleSymbols.add(codonSymbols.get(0)); 196 nonWobbleSymbols.add(codonSymbols.get(1)); 197 AtomicSymbol nonWobble = (AtomicSymbol) nonWobbleAlfa.getSymbol(nonWobbleSymbols); 198 nonWobbleBases.add(nonWobble); 199 200 // add counts to the appropriate Count objects 201 double codonFreq = codonPref.getWeight(synonym); 202 nonWobbleCounts.increaseCount(nonWobble, codonFreq); 203 204 // add Counts 205 wobbleCounts = (IndexedCount) wobbleDists.get(nonWobble); 206 if (wobbleCounts == null) { 207 wobbleCounts = new IndexedCount(RNATools.getRNA()); 208 wobbleDists.put(nonWobble, wobbleCounts); 209 } 210 211 wobbleCounts.increaseCount(wobble, codonFreq); 212 } 213 214 // convert the accumulated Counts into Distributions 215 Distribution nonWobbleDist = DistributionTools.countToDistribution(nonWobbleCounts); 216 217 for (Iterator nonWobbleBasesI = nonWobbleBases.iterator(); nonWobbleBasesI.hasNext(); ) { 218 AtomicSymbol nonWobbleBase = (AtomicSymbol) nonWobbleBasesI.next(); 219 220 // retrieve and replace each Count with its corresponding Distribution 221 IndexedCount count = (IndexedCount) wobbleDists.get(nonWobbleBase); 222 223 if (count != null) { 224 Distribution wobbleDist = DistributionTools.countToDistribution(count); 225 wobbleDists.put(nonWobbleBase, wobbleDist); 226 } 227 } 228 wobbleDistributions.put(residue, new SimpleWobbleDistribution(residue, nonWobbleBases, nonWobbleDist, wobbleDists)); 229 } 230 } 231 catch (IllegalAlphabetException iae) {System.err.println("unexpected IllegalAlphabetException"); } 232 catch (ChangeVetoException cve) {System.err.println("unexpected ChangeVetoException"); } 233 } 234} 235