001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on DATE 021 * 022 */ 023package org.biojava.nbio.core.sequence; 024 025 026import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 027import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 028import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 029import org.biojava.nbio.core.sequence.template.CompoundSet; 030 031/** 032 * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence} 033 * where a TranscriptSequence is the child of a GeneSequence 034 * Not important for protein construction but the phase is used if outputting the gene 035 * to a gff3 file. <a href="http://www.sequenceontology.org/gff3.shtml">http://www.sequenceontology.org/gff3.shtml</a> 036 * @author Scooter Willis 037 */ 038public class CDSSequence extends DNASequence { 039 040 //private static final Logger log = Logger.getLogger(CDSSequence.class.getName()); 041 Integer phase = 0; // 0, 1, 2 042 TranscriptSequence parentTranscriptSequence; 043 044 /** 045 * 046 * @param parentSequence 047 * @param bioBegin 048 * @param bioEnd 049 * @param phase 050 * @throws IllegalArgumentException if parentSequence is incompatible with DNACompoundSet 051 */ 052 public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) { 053 setCompoundSet(DNACompoundSet.getDNACompoundSet()); 054 try { 055 initSequenceStorage(parentSequence.getSequenceAsString()); 056 } catch (CompoundNotFoundException e) { 057 throw new IllegalArgumentException(e); 058 } 059 parentTranscriptSequence = parentSequence; 060 this.setParentSequence(parentTranscriptSequence); 061 setBioBegin(bioBegin); 062 setBioEnd(bioEnd); 063 this.phase = phase; 064 this.setCompoundSet(DNACompoundSet.getDNACompoundSet()); 065 066 } 067 068 @Override 069 public int getLength() { 070 return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1; 071 } 072 073 /** 074 * 075 * @return get the phase 076 */ 077 public Integer getPhase() { 078 return phase; 079 } 080 081 /** 082 * 083 * @return get the strand 084 */ 085 public Strand getStrand() { 086 return parentTranscriptSequence.getStrand(); 087 } 088 089 /** 090 * A CDS sequence if negative stranded needs to be reverse complement 091 * to represent the actual coding sequence. When getting a ProteinSequence 092 * from a TranscriptSequence this method is callled for each CDSSequence 093 * <a href="http://www.sequenceontology.org/gff3.shtml">http://www.sequenceontology.org/gff3.shtml</a> 094 * <a href="http://biowiki.org/~yam/bioe131/GFF.ppt">http://biowiki.org/~yam/bioe131/GFF.ppt</a> 095 * @return coding sequence 096 */ 097 public String getCodingSequence() { 098 String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand()); 099 100 if (getStrand() == Strand.NEGATIVE) { 101 //need to take complement of sequence because it is negative and we are returning a coding sequence 102 StringBuilder b = new StringBuilder(getLength()); 103 CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); 104 for (int i = 0; i < sequence.length(); i++) { 105 String nucleotide = String.valueOf(sequence.charAt(i)); 106 NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); 107 b.append(nucleotideCompound.getComplement().getShortName()); 108 } 109 sequence = b.toString(); 110 } 111 // sequence = sequence.substring(phase); 112 return sequence; 113 } 114}