Source code

001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023package org.biojava.nbio.core.sequence;
024
025
026import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
027import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
028import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
029import org.biojava.nbio.core.sequence.template.CompoundSet;
030
031/**
032 * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence}
033 * where a TranscriptSequence is the child of a GeneSequence
034 * Not important for protein construction but the phase is used if outputting the gene
035 * to a gff3 file. <a href="http://www.sequenceontology.org/gff3.shtml">http://www.sequenceontology.org/gff3.shtml</a>
036 * @author Scooter Willis
037 */
038public class CDSSequence extends DNASequence {
039
040        //private static final Logger log = Logger.getLogger(CDSSequence.class.getName());
041        Integer phase = 0; // 0, 1, 2
042        TranscriptSequence parentTranscriptSequence;
043
044        /**
045         *
046         * @param parentSequence
047         * @param bioBegin
048         * @param bioEnd
049         * @param phase
050         * @throws  IllegalArgumentException if  parentSequence is incompatible with DNACompoundSet
051         */
052        public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) {
053                setCompoundSet(DNACompoundSet.getDNACompoundSet());
054                try {
055                        initSequenceStorage(parentSequence.getSequenceAsString());
056                } catch (CompoundNotFoundException e) {
057                        throw new IllegalArgumentException(e);
058                }
059                parentTranscriptSequence = parentSequence;
060                this.setParentSequence(parentTranscriptSequence);
061                setBioBegin(bioBegin);
062                setBioEnd(bioEnd);
063                this.phase = phase;
064                this.setCompoundSet(DNACompoundSet.getDNACompoundSet());
065
066        }
067
068                @Override
069        public int getLength() {
070                return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1;
071        }
072
073        /**
074         *
075         * @return get the phase
076         */
077        public Integer getPhase() {
078                return phase;
079        }
080
081        /**
082         *
083         * @return get the strand
084         */
085        public Strand getStrand() {
086                return parentTranscriptSequence.getStrand();
087        }
088
089        /**
090         * A CDS sequence if negative stranded needs to be reverse complement
091         * to represent the actual coding sequence. When getting a ProteinSequence
092         * from a TranscriptSequence this method is callled for each CDSSequence
093         * <a href="http://www.sequenceontology.org/gff3.shtml">http://www.sequenceontology.org/gff3.shtml</a>
094         * <a href="http://biowiki.org/~yam/bioe131/GFF.ppt">http://biowiki.org/~yam/bioe131/GFF.ppt</a>
095         * @return coding sequence
096         */
097        public String getCodingSequence() {
098                String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand());
099
100                if (getStrand() == Strand.NEGATIVE) {
101                        //need to take complement of sequence because it is negative and we are returning a coding sequence
102                        StringBuilder b = new StringBuilder(getLength());
103                        CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet();
104                        for (int i = 0; i < sequence.length(); i++) {
105                                String nucleotide = String.valueOf(sequence.charAt(i));
106                                NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide);
107                                b.append(nucleotideCompound.getComplement().getShortName());
108                        }
109                        sequence = b.toString();
110                }
111                //  sequence = sequence.substring(phase);
112                return sequence;
113        }
114}