001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on DATE 021 * 022 */ 023package org.biojava.nbio.core.sequence; 024 025 026import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 027import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 028import org.biojava.nbio.core.sequence.template.CompoundSet; 029 030/** 031 * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence} 032 * where a TranscriptSequence is the child of a GeneSequence 033 * Not important for protein construction but the phase is used if outputting the gene 034 * to a gff3 file. {@link http://www.sequenceontology.org/gff3.shtml} 035 * @author Scooter Willis 036 */ 037public class CDSSequence extends DNASequence { 038 039 //private static final Logger log = Logger.getLogger(CDSSequence.class.getName()); 040 Integer phase = 0; // 0, 1, 2 041 TranscriptSequence parentTranscriptSequence; 042 043 /** 044 * 045 * @param parentSequence 046 * @param bioBegin 047 * @param bioEnd 048 * @param phase 049 */ 050 public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) { 051 parentTranscriptSequence = parentSequence; 052 this.setParentSequence(parentTranscriptSequence); 053 setBioBegin(bioBegin); 054 setBioEnd(bioEnd); 055 this.phase = phase; 056 this.setCompoundSet(DNACompoundSet.getDNACompoundSet()); 057 058 } 059 060 @Override 061 public int getLength() { 062 return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1; 063 } 064 065 /** 066 * 067 * @return get the phase 068 */ 069 public Integer getPhase() { 070 return phase; 071 } 072 073 /** 074 * 075 * @return get the strand 076 */ 077 public Strand getStrand() { 078 return parentTranscriptSequence.getStrand(); 079 } 080 081 /** 082 * A CDS sequence if negative stranded needs to be reverse complement 083 * to represent the actual coding sequence. When getting a ProteinSequence 084 * from a TranscriptSequence this method is callled for each CDSSequence 085 * {@link http://www.sequenceontology.org/gff3.shtml} 086 * {@link http://biowiki.org/~yam/bioe131/GFF.ppt} 087 * @return coding sequence 088 */ 089 public String getCodingSequence() { 090 String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand()); 091 092 if (getStrand() == Strand.NEGATIVE) { 093 //need to take complement of sequence because it is negative and we are returning a coding sequence 094 StringBuilder b = new StringBuilder(getLength()); 095 CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); 096 for (int i = 0; i < sequence.length(); i++) { 097 String nucleotide = String.valueOf(sequence.charAt(i)); 098 NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); 099 b.append(nucleotideCompound.getComplement().getShortName()); 100 } 101 sequence = b.toString(); 102 } 103 // sequence = sequence.substring(phase); 104 return sequence; 105 } 106}