001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.transcription; 023 024import org.biojava.nbio.core.sequence.compound.*; 025import org.biojava.nbio.core.sequence.io.IUPACParser; 026import org.biojava.nbio.core.sequence.io.IUPACParser.IUPACTable; 027import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator; 028import org.biojava.nbio.core.sequence.io.RNASequenceCreator; 029import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; 030import org.biojava.nbio.core.sequence.template.CompoundSet; 031import org.biojava.nbio.core.sequence.template.Sequence; 032import org.biojava.nbio.core.sequence.transcription.Table.Codon; 033 034import java.util.EnumMap; 035import java.util.Map; 036 037/** 038 * Used as a way of encapsulating the data structures required to parse DNA to a 039 * Protein sequence. 040 * 041 * In order to build one look at @ TranscriptionEngine.Builder} which provides 042 * intelligent defaults & allows you to build an engine which is nearly the same 043 * as the default one but with a few changes. All of the engine is customisable. 044 * 045 * By default the code will attempt to: 046 * 047 * <ul> 048 * <li>Trim Stops</li> 049 * <li>Convert initiating codons to M</li> 050 * <li>Allow for the fuzzy translation of Codons i.e. if it contains an N that 051 * produces a {@link Sequence}<{@link{AminoAcidCompound}> with an X at 052 * that position 053 * </ul> 054 * 055 * @author ayates 056 */ 057public class TranscriptionEngine { 058 059 private static final class IOD { 060 061 public static final TranscriptionEngine INSTANCE = new TranscriptionEngine.Builder() 062 .build(); 063 } 064 065 /** 066 * Default instance to use when Transcribing from DNA -> RNA -> 067 * Protein. If you require anything that is not a default setting then look 068 * at @ TranscriptionEngine.Builder} for customisation options. 069 */ 070 public static TranscriptionEngine getDefault() { 071 return IOD.INSTANCE; 072 } 073 074 private final Table table; 075 private final RNAToAminoAcidTranslator rnaAminoAcidTranslator; 076 private final DNAToRNATranslator dnaRnaTranslator; 077 private final SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator; 078 private final SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator; 079 private final CompoundSet<NucleotideCompound> dnaCompounds; 080 private final CompoundSet<NucleotideCompound> rnaCompounds; 081 private final CompoundSet<AminoAcidCompound> aminoAcidCompounds; 082 083 private TranscriptionEngine(Table table, 084 RNAToAminoAcidTranslator rnaAminoAcidTranslator, 085 DNAToRNATranslator dnaRnaTranslator, 086 SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator, 087 SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator, 088 CompoundSet<NucleotideCompound> dnaCompounds, 089 CompoundSet<NucleotideCompound> rnaCompounds, 090 CompoundSet<AminoAcidCompound> aminoAcidCompounds) { 091 this.table = table; 092 this.rnaAminoAcidTranslator = rnaAminoAcidTranslator; 093 this.dnaRnaTranslator = dnaRnaTranslator; 094 this.proteinSequenceCreator = proteinSequenceCreator; 095 this.rnaSequenceCreator = rnaSequenceCreator; 096 this.dnaCompounds = dnaCompounds; 097 this.rnaCompounds = rnaCompounds; 098 this.aminoAcidCompounds = aminoAcidCompounds; 099 } 100 101 /** 102 * Quick method to let you go from a CDS to a Peptide quickly. It assumes 103 * you are translating only in the first frame 104 * 105 * @param dna 106 * The CDS to translate 107 * @return The Protein Sequence 108 */ 109 public Sequence<AminoAcidCompound> translate( 110 Sequence<NucleotideCompound> dna) { 111 Map<Frame, Sequence<AminoAcidCompound>> trans = multipleFrameTranslation( 112 dna, Frame.ONE); 113 return trans.get(Frame.ONE); 114 } 115 116 /** 117 * A way of translating DNA in a number of frames 118 * 119 * @param dna 120 * The CDS to translate 121 * @param frames 122 * The Frames to translate in 123 * @return All generated protein sequences in the given frames. Can have 124 * null entries 125 */ 126 public Map<Frame, Sequence<AminoAcidCompound>> multipleFrameTranslation( 127 Sequence<NucleotideCompound> dna, Frame... frames) { 128 Map<Frame, Sequence<AminoAcidCompound>> results = new EnumMap<Frame, Sequence<AminoAcidCompound>>( 129 Frame.class); 130 for (Frame frame : frames) { 131 Sequence<NucleotideCompound> rna = getDnaRnaTranslator() 132 .createSequence(dna, frame); 133 Sequence<AminoAcidCompound> peptide = getRnaAminoAcidTranslator() 134 .createSequence(rna); 135 results.put(frame, peptide); 136 } 137 return results; 138 } 139 140 public Table getTable() { 141 return table; 142 } 143 144 public RNAToAminoAcidTranslator getRnaAminoAcidTranslator() { 145 return rnaAminoAcidTranslator; 146 } 147 148 public DNAToRNATranslator getDnaRnaTranslator() { 149 return dnaRnaTranslator; 150 } 151 152 public SequenceCreatorInterface<AminoAcidCompound> getProteinSequenceCreator() { 153 return proteinSequenceCreator; 154 } 155 156 public SequenceCreatorInterface<NucleotideCompound> getRnaSequenceCreator() { 157 return rnaSequenceCreator; 158 } 159 160 public CompoundSet<NucleotideCompound> getDnaCompounds() { 161 return dnaCompounds; 162 } 163 164 public CompoundSet<NucleotideCompound> getRnaCompounds() { 165 return rnaCompounds; 166 } 167 168 public CompoundSet<AminoAcidCompound> getAminoAcidCompounds() { 169 return aminoAcidCompounds; 170 } 171 172 /** 173 * This class is the way to create a {@link TranslationEngine}. 174 */ 175 public static class Builder { 176 177 private Table table; 178 private RNAToAminoAcidTranslator rnaAminoAcidTranslator; 179 private DNAToRNATranslator dnaRnaTranslator; 180 private SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator; 181 private SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator; 182 private CompoundSet<NucleotideCompound> dnaCompounds; 183 private CompoundSet<NucleotideCompound> rnaCompounds; 184 private CompoundSet<AminoAcidCompound> aminoAcidCompounds; 185 private boolean initMet = true; 186 private boolean trimStop = true; 187 private boolean translateNCodons = true; 188 private boolean decorateRna = false; 189 // Set at false for backwards compatibility 190 private boolean stopAtStopCodons = false; 191 private boolean waitForStartCodon = false; 192 193 /** 194 * The method to finish any calls to the builder with which returns a 195 * transcription engine. The engine is designed to provide everything 196 * required for transcription to those classes which will do the 197 * transcription. 198 */ 199 public TranscriptionEngine build() { 200 return new TranscriptionEngine(getTable(), 201 getRnaAminoAcidTranslator(), getDnaRnaTranslator(), 202 getProteinCreator(), getRnaCreator(), getDnaCompounds(), 203 getRnaCompounds(), getAminoAcidCompounds()); 204 } 205 206 // ---- START OF BUILDER METHODS 207 /** 208 * Uses the static instance of {@link IUPACParser} to find instances of 209 * {@link IUPACTable}s by ID. 210 */ 211 public Builder table(Integer id) { 212 table = IUPACParser.getInstance().getTable(id); 213 return this; 214 } 215 216 /** 217 * Uses the static instance of {@link IUPACParser} to find instances of 218 * {@link IUPACTable}s by its String name 219 */ 220 public Builder table(String name) { 221 table = IUPACParser.getInstance().getTable(name); 222 return this; 223 } 224 225 public Builder table(Table table) { 226 this.table = table; 227 return this; 228 } 229 230 public Builder dnaCompounds(CompoundSet<NucleotideCompound> compounds) { 231 this.dnaCompounds = compounds; 232 return this; 233 } 234 235 public Builder rnaCompounds(CompoundSet<NucleotideCompound> compounds) { 236 this.rnaCompounds = compounds; 237 return this; 238 } 239 240 public Builder aminoAcidsCompounds( 241 CompoundSet<AminoAcidCompound> compounds) { 242 this.aminoAcidCompounds = compounds; 243 return this; 244 } 245 246 public Builder dnaRnaTranslator(DNAToRNATranslator translator) { 247 this.dnaRnaTranslator = translator; 248 return this; 249 } 250 251 public Builder rnaAminoAcidTranslator( 252 RNAToAminoAcidTranslator translator) { 253 this.rnaAminoAcidTranslator = translator; 254 return this; 255 } 256 257 public Builder proteinCreator( 258 SequenceCreatorInterface<AminoAcidCompound> creator) { 259 this.proteinSequenceCreator = creator; 260 return this; 261 } 262 263 public Builder rnaCreator( 264 SequenceCreatorInterface<NucleotideCompound> creator) { 265 this.rnaSequenceCreator = creator; 266 return this; 267 } 268 269 public Builder initMet(boolean initMet) { 270 this.initMet = initMet; 271 return this; 272 } 273 274 public Builder trimStop(boolean trimStop) { 275 this.trimStop = trimStop; 276 return this; 277 } 278 279 public Builder translateNCodons(boolean translateNCodons) { 280 this.translateNCodons = translateNCodons; 281 return this; 282 } 283 284 /** 285 * If set, then the last codon translated in the resulting peptide 286 * sequence will be the stop codon 287 */ 288 public Builder stopAtStopCodons(boolean stopAtStopCodons) { 289 this.stopAtStopCodons = stopAtStopCodons; 290 return this; 291 } 292 293 /** 294 * If set, then translation will not start until a start codon is 295 * encountered 296 */ 297 public Builder waitForStartCodon(boolean waitForStartCodon) { 298 this.waitForStartCodon = waitForStartCodon; 299 return this; 300 } 301 302 /** 303 * Performs an optimisation where RNASequences are not translated into 304 * their own objects but are views onto the base DNA sequence. 305 */ 306 public Builder decorateRna(boolean decorateRna) { 307 this.decorateRna = decorateRna; 308 return this; 309 } 310 311 // ------ INTERNAL BUILDERS with defaults if exists 312 private CompoundSet<NucleotideCompound> getDnaCompounds() { 313 if (dnaCompounds != null) { 314 return dnaCompounds; 315 } 316 return AmbiguityDNACompoundSet.getDNACompoundSet(); 317 } 318 319 private CompoundSet<NucleotideCompound> getRnaCompounds() { 320 if (rnaCompounds != null) { 321 return rnaCompounds; 322 } 323 return AmbiguityRNACompoundSet.getRNACompoundSet(); 324 } 325 326 private CompoundSet<AminoAcidCompound> getAminoAcidCompounds() { 327 if (aminoAcidCompounds != null) { 328 return aminoAcidCompounds; 329 } 330 return AminoAcidCompoundSet.getAminoAcidCompoundSet(); 331 } 332 333 private DNAToRNATranslator getDnaRnaTranslator() { 334 if (dnaRnaTranslator != null) { 335 return dnaRnaTranslator; 336 } 337 return new DNAToRNATranslator(new RNASequenceCreator( 338 getRnaCompounds()), getDnaCompounds(), getRnaCompounds(), 339 isDecorateRna()); 340 } 341 342 private RNAToAminoAcidTranslator getRnaAminoAcidTranslator() { 343 if (rnaAminoAcidTranslator != null) { 344 return rnaAminoAcidTranslator; 345 } 346 return new RNAToAminoAcidTranslator(getProteinCreator(), 347 getRnaCompounds(), getCodons(), getAminoAcidCompounds(), 348 getTable(), isTrimStop(), isInitMet(), 349 isTranslateNCodons(), isStopAtStopCodons(), 350 isWaitForStartCodon()); 351 } 352 353 private CompoundSet<Codon> getCodons() { 354 return getTable().getCodonCompoundSet(getRnaCompounds(), 355 getAminoAcidCompounds()); 356 } 357 358 private SequenceCreatorInterface<AminoAcidCompound> getProteinCreator() { 359 if (proteinSequenceCreator != null) { 360 return proteinSequenceCreator; 361 } 362 return new ProteinSequenceCreator(getAminoAcidCompounds()); 363 } 364 365 private SequenceCreatorInterface<NucleotideCompound> getRnaCreator() { 366 if (rnaSequenceCreator != null) { 367 return rnaSequenceCreator; 368 } 369 return new RNASequenceCreator(getRnaCompounds()); 370 } 371 372 private Table getTable() { 373 if (table != null) { 374 return table; 375 } 376 table(1); // Will set table to default IUPAC codee 377 return table; 378 } 379 380 private boolean isInitMet() { 381 return initMet; 382 } 383 384 private boolean isTrimStop() { 385 return trimStop; 386 } 387 388 private boolean isTranslateNCodons() { 389 return translateNCodons; 390 } 391 392 private boolean isDecorateRna() { 393 return decorateRna; 394 } 395 396 private boolean isStopAtStopCodons() { 397 return stopAtStopCodons; 398 } 399 400 private boolean isWaitForStartCodon() { 401 return waitForStartCodon; 402 } 403 } 404}