001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.transcription;
023
024import org.biojava.nbio.core.sequence.compound.*;
025import org.biojava.nbio.core.sequence.io.IUPACParser;
026import org.biojava.nbio.core.sequence.io.IUPACParser.IUPACTable;
027import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
028import org.biojava.nbio.core.sequence.io.RNASequenceCreator;
029import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
030import org.biojava.nbio.core.sequence.template.CompoundSet;
031import org.biojava.nbio.core.sequence.template.Sequence;
032import org.biojava.nbio.core.sequence.transcription.Table.Codon;
033
034import java.util.EnumMap;
035import java.util.Map;
036
037/**
038 * Used as a way of encapsulating the data structures required to parse DNA to a
039 * Protein sequence.
040 *
041 * In order to build one look at @ TranscriptionEngine.Builder} which provides
042 * intelligent defaults and allows you to build an engine which is nearly the same
043 * as the default one but with a few changes. All of the engine is customisable.
044 *
045 * By default the code will attempt to:
046 *
047 * <ul>
048 * <li>Trim Stops</li>
049 * <li>Convert initiating codons to M</li>
050 * <li>Allow for the fuzzy translation of Codons i.e. if it contains an N that
051 * produces a {@link Sequence<AminoAcidCompound>} with an X at
052 * that position
053 * </ul>
054 *
055 * @author ayates
056 */
057public class TranscriptionEngine {
058
059        private static final class IOD {
060
061                public static final TranscriptionEngine INSTANCE = new TranscriptionEngine.Builder()
062                                .build();
063        }
064
065        /**
066         * Default instance to use when Transcribing from DNA -&gt; RNA -&gt;
067         * Protein. If you require anything that is not a default setting then look
068         * at @ TranscriptionEngine.Builder} for customisation options.
069         */
070        public static TranscriptionEngine getDefault() {
071                return IOD.INSTANCE;
072        }
073
074        private final Table table;
075        private final RNAToAminoAcidTranslator rnaAminoAcidTranslator;
076        private final DNAToRNATranslator dnaRnaTranslator;
077        private final SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
078        private final SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
079        private final CompoundSet<NucleotideCompound> dnaCompounds;
080        private final CompoundSet<NucleotideCompound> rnaCompounds;
081        private final CompoundSet<AminoAcidCompound> aminoAcidCompounds;
082
083        private TranscriptionEngine(Table table,
084                        RNAToAminoAcidTranslator rnaAminoAcidTranslator,
085                        DNAToRNATranslator dnaRnaTranslator,
086                        SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator,
087                        SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator,
088                        CompoundSet<NucleotideCompound> dnaCompounds,
089                        CompoundSet<NucleotideCompound> rnaCompounds,
090                        CompoundSet<AminoAcidCompound> aminoAcidCompounds) {
091                this.table = table;
092                this.rnaAminoAcidTranslator = rnaAminoAcidTranslator;
093                this.dnaRnaTranslator = dnaRnaTranslator;
094                this.proteinSequenceCreator = proteinSequenceCreator;
095                this.rnaSequenceCreator = rnaSequenceCreator;
096                this.dnaCompounds = dnaCompounds;
097                this.rnaCompounds = rnaCompounds;
098                this.aminoAcidCompounds = aminoAcidCompounds;
099        }
100
101        /**
102         * Quick method to let you go from a CDS to a Peptide quickly. It assumes
103         * you are translating only in the first frame
104         *
105         * @param dna
106         *            The CDS to translate
107         * @return The Protein Sequence
108         */
109        public Sequence<AminoAcidCompound> translate(
110                        Sequence<NucleotideCompound> dna) {
111                Map<Frame, Sequence<AminoAcidCompound>> trans = multipleFrameTranslation(
112                                dna, Frame.ONE);
113                return trans.get(Frame.ONE);
114        }
115
116        /**
117         * A way of translating DNA in a number of frames
118         *
119         * @param dna
120         *            The CDS to translate
121         * @param frames
122         *            The Frames to translate in
123         * @return All generated protein sequences in the given frames. Can have
124         *         null entries
125         */
126        public Map<Frame, Sequence<AminoAcidCompound>> multipleFrameTranslation(
127                        Sequence<NucleotideCompound> dna, Frame... frames) {
128                Map<Frame, Sequence<AminoAcidCompound>> results = new EnumMap<>(
129                                Frame.class);
130                for (Frame frame : frames) {
131                        Sequence<NucleotideCompound> rna = getDnaRnaTranslator()
132                                        .createSequence(dna, frame);
133                        Sequence<AminoAcidCompound> peptide = getRnaAminoAcidTranslator()
134                                        .createSequence(rna);
135                        results.put(frame, peptide);
136                }
137                return results;
138        }
139
140        public Table getTable() {
141                return table;
142        }
143
144        public RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
145                return rnaAminoAcidTranslator;
146        }
147
148        public DNAToRNATranslator getDnaRnaTranslator() {
149                return dnaRnaTranslator;
150        }
151
152        public SequenceCreatorInterface<AminoAcidCompound> getProteinSequenceCreator() {
153                return proteinSequenceCreator;
154        }
155
156        public SequenceCreatorInterface<NucleotideCompound> getRnaSequenceCreator() {
157                return rnaSequenceCreator;
158        }
159
160        public CompoundSet<NucleotideCompound> getDnaCompounds() {
161                return dnaCompounds;
162        }
163
164        public CompoundSet<NucleotideCompound> getRnaCompounds() {
165                return rnaCompounds;
166        }
167
168        public CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
169                return aminoAcidCompounds;
170        }
171
172        /**
173         * This class is the way to create a {@link TranscriptionEngine}.
174         */
175        public static class Builder {
176
177                private Table table;
178                private RNAToAminoAcidTranslator rnaAminoAcidTranslator;
179                private DNAToRNATranslator dnaRnaTranslator;
180                private SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
181                private SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
182                private CompoundSet<NucleotideCompound> dnaCompounds;
183                private CompoundSet<NucleotideCompound> rnaCompounds;
184                private CompoundSet<AminoAcidCompound> aminoAcidCompounds;
185                private boolean initMet = true;
186                private boolean trimStop = true;
187                private boolean translateNCodons = true;
188                private boolean decorateRna = false;
189                // Set at false for backwards compatibility
190                private boolean stopAtStopCodons = false;
191                private boolean waitForStartCodon = false;
192
193                /**
194                 * The method to finish any calls to the builder with which returns a
195                 * transcription engine. The engine is designed to provide everything
196                 * required for transcription to those classes which will do the
197                 * transcription.
198                 */
199                public TranscriptionEngine build() {
200                        return new TranscriptionEngine(getTable(),
201                                        getRnaAminoAcidTranslator(), getDnaRnaTranslator(),
202                                        getProteinCreator(), getRnaCreator(), getDnaCompounds(),
203                                        getRnaCompounds(), getAminoAcidCompounds());
204                }
205
206                // ---- START OF BUILDER METHODS
207                /**
208                 * Uses the static instance of {@link IUPACParser} to find instances of
209                 * {@link IUPACTable}s by ID.
210                 */
211                public Builder table(Integer id) {
212                        table = IUPACParser.getInstance().getTable(id);
213                        return this;
214                }
215
216                /**
217                 * Uses the static instance of {@link IUPACParser} to find instances of
218                 * {@link IUPACTable}s by its String name
219                 */
220                public Builder table(String name) {
221                        table = IUPACParser.getInstance().getTable(name);
222                        return this;
223                }
224
225                public Builder table(Table table) {
226                        this.table = table;
227                        return this;
228                }
229
230                public Builder dnaCompounds(CompoundSet<NucleotideCompound> compounds) {
231                        this.dnaCompounds = compounds;
232                        return this;
233                }
234
235                public Builder rnaCompounds(CompoundSet<NucleotideCompound> compounds) {
236                        this.rnaCompounds = compounds;
237                        return this;
238                }
239
240                public Builder aminoAcidsCompounds(
241                                CompoundSet<AminoAcidCompound> compounds) {
242                        this.aminoAcidCompounds = compounds;
243                        return this;
244                }
245
246                public Builder dnaRnaTranslator(DNAToRNATranslator translator) {
247                        this.dnaRnaTranslator = translator;
248                        return this;
249                }
250
251                public Builder rnaAminoAcidTranslator(
252                                RNAToAminoAcidTranslator translator) {
253                        this.rnaAminoAcidTranslator = translator;
254                        return this;
255                }
256
257                public Builder proteinCreator(
258                                SequenceCreatorInterface<AminoAcidCompound> creator) {
259                        this.proteinSequenceCreator = creator;
260                        return this;
261                }
262
263                public Builder rnaCreator(
264                                SequenceCreatorInterface<NucleotideCompound> creator) {
265                        this.rnaSequenceCreator = creator;
266                        return this;
267                }
268
269                public Builder initMet(boolean initMet) {
270                        this.initMet = initMet;
271                        return this;
272                }
273
274                public Builder trimStop(boolean trimStop) {
275                        this.trimStop = trimStop;
276                        return this;
277                }
278
279                public Builder translateNCodons(boolean translateNCodons) {
280                        this.translateNCodons = translateNCodons;
281                        return this;
282                }
283
284                /**
285                 * If set, then the last codon translated in the resulting peptide
286                 * sequence will be the stop codon
287                 */
288                public Builder stopAtStopCodons(boolean stopAtStopCodons) {
289                        this.stopAtStopCodons = stopAtStopCodons;
290                        return this;
291                }
292
293                /**
294                 * If set, then translation will not start until a start codon is
295                 * encountered
296                 */
297                public Builder waitForStartCodon(boolean waitForStartCodon) {
298                        this.waitForStartCodon = waitForStartCodon;
299                        return this;
300                }
301
302                /**
303                 * Performs an optimisation where RNASequences are not translated into
304                 * their own objects but are views onto the base DNA sequence.
305                 */
306                public Builder decorateRna(boolean decorateRna) {
307                        this.decorateRna = decorateRna;
308                        return this;
309                }
310
311                // ------ INTERNAL BUILDERS with defaults if exists
312                private CompoundSet<NucleotideCompound> getDnaCompounds() {
313                        if (dnaCompounds != null) {
314                                return dnaCompounds;
315                        }
316                        return AmbiguityDNACompoundSet.getDNACompoundSet();
317                }
318
319                private CompoundSet<NucleotideCompound> getRnaCompounds() {
320                        if (rnaCompounds != null) {
321                                return rnaCompounds;
322                        }
323                        return AmbiguityRNACompoundSet.getRNACompoundSet();
324                }
325
326                private CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
327                        if (aminoAcidCompounds != null) {
328                                return aminoAcidCompounds;
329                        }
330                        return AminoAcidCompoundSet.getAminoAcidCompoundSet();
331                }
332
333                private DNAToRNATranslator getDnaRnaTranslator() {
334                        if (dnaRnaTranslator != null) {
335                                return dnaRnaTranslator;
336                        }
337                        return new DNAToRNATranslator(new RNASequenceCreator(
338                                        getRnaCompounds()), getDnaCompounds(), getRnaCompounds(),
339                                        isDecorateRna());
340                }
341
342                private RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
343                        if (rnaAminoAcidTranslator != null) {
344                                return rnaAminoAcidTranslator;
345                        }
346                        return new RNAToAminoAcidTranslator(getProteinCreator(),
347                                        getRnaCompounds(), getCodons(), getAminoAcidCompounds(),
348                                        getTable(), isTrimStop(), isInitMet(),
349                                        isTranslateNCodons(), isStopAtStopCodons(),
350                                        isWaitForStartCodon());
351                }
352
353                private CompoundSet<Codon> getCodons() {
354                        return getTable().getCodonCompoundSet(getRnaCompounds(),
355                                        getAminoAcidCompounds());
356                }
357
358                private SequenceCreatorInterface<AminoAcidCompound> getProteinCreator() {
359                        if (proteinSequenceCreator != null) {
360                                return proteinSequenceCreator;
361                        }
362                        return new ProteinSequenceCreator(getAminoAcidCompounds());
363                }
364
365                private SequenceCreatorInterface<NucleotideCompound> getRnaCreator() {
366                        if (rnaSequenceCreator != null) {
367                                return rnaSequenceCreator;
368                        }
369                        return new RNASequenceCreator(getRnaCompounds());
370                }
371
372                private Table getTable() {
373                        if (table != null) {
374                                return table;
375                        }
376                        table(1); // Will set table to default IUPAC codee
377                        return table;
378                }
379
380                private boolean isInitMet() {
381                        return initMet;
382                }
383
384                private boolean isTrimStop() {
385                        return trimStop;
386                }
387
388                private boolean isTranslateNCodons() {
389                        return translateNCodons;
390                }
391
392                private boolean isDecorateRna() {
393                        return decorateRna;
394                }
395
396                private boolean isStopAtStopCodons() {
397                        return stopAtStopCodons;
398                }
399
400                private boolean isWaitForStartCodon() {
401                        return waitForStartCodon;
402                }
403        }
404}