001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023package org.biojava.nbio.core.sequence;
024
025
026import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
027import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
028import org.biojava.nbio.core.sequence.template.CompoundSet;
029
030/**
031 * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence}
032 * where a TranscriptSequence is the child of a GeneSequence
033 * Not important for protein construction but the phase is used if outputting the gene
034 * to a gff3 file. {@link http://www.sequenceontology.org/gff3.shtml}
035 * @author Scooter Willis
036 */
037public class CDSSequence extends DNASequence {
038
039        //private static final Logger log = Logger.getLogger(CDSSequence.class.getName());
040        Integer phase = 0; // 0, 1, 2
041        TranscriptSequence parentTranscriptSequence;
042
043        /**
044         *
045         * @param parentSequence
046         * @param bioBegin
047         * @param bioEnd
048         * @param phase
049         */
050        public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) {
051                parentTranscriptSequence = parentSequence;
052                this.setParentSequence(parentTranscriptSequence);
053                setBioBegin(bioBegin);
054                setBioEnd(bioEnd);
055                this.phase = phase;
056                this.setCompoundSet(DNACompoundSet.getDNACompoundSet());
057
058        }
059
060                @Override
061        public int getLength() {
062                return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1;
063        }
064
065        /**
066         *
067         * @return get the phase
068         */
069        public Integer getPhase() {
070                return phase;
071        }
072
073        /**
074         *
075         * @return get the strand
076         */
077        public Strand getStrand() {
078                return parentTranscriptSequence.getStrand();
079        }
080
081        /**
082         * A CDS sequence if negative stranded needs to be reverse complement
083         * to represent the actual coding sequence. When getting a ProteinSequence
084         * from a TranscriptSequence this method is callled for each CDSSequence
085         * {@link http://www.sequenceontology.org/gff3.shtml}
086         * {@link http://biowiki.org/~yam/bioe131/GFF.ppt}
087         * @return coding sequence
088         */
089        public String getCodingSequence() {
090                String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand());
091
092                if (getStrand() == Strand.NEGATIVE) {
093                        //need to take complement of sequence because it is negative and we are returning a coding sequence
094                        StringBuilder b = new StringBuilder(getLength());
095                        CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet();
096                        for (int i = 0; i < sequence.length(); i++) {
097                                String nucleotide = String.valueOf(sequence.charAt(i));
098                                NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide);
099                                b.append(nucleotideCompound.getComplement().getShortName());
100                        }
101                        sequence = b.toString();
102                }
103                //  sequence = sequence.substring(phase);
104                return sequence;
105        }
106}