001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022
023package org.biojava.nbio.core.sequence.io;
024
025import org.biojava.nbio.core.sequence.*;
026import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.io.OutputStream;
032import java.util.Collection;
033
034/**
035 * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version.
036 * It is also an option to write out the gene sequence where the exon regions are upper case
037 * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence
038 * @author Scooter Willis <willishf at gmail dot com>
039 */
040public class FastaGeneWriter {
041
042        private final static Logger logger = LoggerFactory.getLogger(FastaGeneWriter.class);
043
044        boolean showExonUppercase = false;
045        OutputStream os;
046        Collection<GeneSequence> sequences;
047        FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat;
048        private int lineLength = 60;
049/**
050 *
051 * @param os
052 * @param sequences
053 * @param headerFormat
054 * @param showExonUppercase
055 */
056        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) {
057                this(os, sequences, headerFormat, showExonUppercase, 60);
058        }
059/**
060 *
061 * @param os
062 * @param sequences
063 * @param headerFormat
064 * @param showExonUppercase
065 * @param lineLength
066 */
067        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) {
068                this.os = os;
069                this.sequences = sequences;
070                this.headerFormat = headerFormat;
071                this.lineLength = lineLength;
072                this.showExonUppercase = showExonUppercase;
073        }
074/**
075 *
076 * @throws Exception
077 */
078        public void process() throws Exception {
079                byte[] lineSep = System.getProperty("line.separator").getBytes();
080
081                for (GeneSequence sequence : sequences) {
082                        String header = headerFormat.getHeader(sequence);
083                        os.write('>');
084                        os.write(header.getBytes());
085                        os.write(lineSep);
086
087                        int compoundCount = 0;
088                        String seq = "";
089                        //GeneSequence currently has a strand attribute to indicate direction
090
091                        seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString();
092                        if (showExonUppercase) {
093                                StringBuilder sb = new StringBuilder(seq.toLowerCase());
094                                int geneBioBegin = sequence.getBioBegin();
095                                int geneBioEnd = sequence.getBioEnd();
096                                for (ExonSequence exonSequence : sequence.getExonSequences()) {
097                                        int featureBioBegin = 0;
098                                        int featureBioEnd = 0;
099                                        if (sequence.getStrand() != Strand.NEGATIVE) {
100                                                featureBioBegin = exonSequence.getBioBegin() - geneBioBegin;
101                                                featureBioEnd = exonSequence.getBioEnd() - geneBioBegin;
102                                        } else {
103                                                featureBioBegin = geneBioEnd - exonSequence.getBioEnd();
104                                                featureBioEnd = geneBioEnd - exonSequence.getBioBegin();
105                                        }
106                                        if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) {
107                                                logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd());
108                                        } else {
109                                                for (int i = featureBioBegin; i <= featureBioEnd; i++) {
110                                                        char ch = sb.charAt(i);
111                                                        //probably not the fastest but the safest way if language is not standard ASCII
112                                                        String temp = String.valueOf(ch);
113                                                        ch = temp.toUpperCase().charAt(0);
114                                                        sb.setCharAt(i, ch);
115                                                }
116                                        }
117                                }
118                                seq = sb.toString();
119                        }
120
121                        for (int i = 0; i < seq.length(); i++) {
122                                os.write(seq.charAt(i));
123                                compoundCount++;
124                                if (compoundCount == lineLength) {
125                                        os.write(lineSep);
126                                        compoundCount = 0;
127                                }
128
129                        }
130
131
132                        //If we had sequence which was a reciprocal of line length
133                        //then don't write the line terminator as this has already written
134                        //it
135                        if ((sequence.getLength() % getLineLength()) != 0) {
136                                os.write(lineSep);
137                        }
138                }
139        }
140
141        /**
142         * @return the lineLength
143         */
144        public int getLineLength() {
145                return lineLength;
146        }
147
148        /**
149         * @param lineLength the lineLength to set
150         */
151        public void setLineLength(int lineLength) {
152                this.lineLength = lineLength;
153        }
154
155        
156}