001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022
023package org.biojava.nbio.core.sequence.io;
024
025import org.biojava.nbio.core.sequence.*;
026import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.io.OutputStream;
032import java.util.ArrayList;
033import java.util.Collection;
034
035/**
036 * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version.
037 * It is also an option to write out the gene sequence where the exon regions are upper case
038 * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence
039 * @author Scooter Willis <willishf at gmail dot com>
040 */
041public class FastaGeneWriter {
042
043        private final static Logger logger = LoggerFactory.getLogger(FastaGeneWriter.class);
044
045        boolean showExonUppercase = false;
046        OutputStream os;
047        Collection<GeneSequence> sequences;
048        FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat;
049        private int lineLength = 60;
050/**
051 *
052 * @param os
053 * @param sequences
054 * @param headerFormat
055 * @param showExonUppercase
056 */
057        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) {
058                this(os, sequences, headerFormat, showExonUppercase, 60);
059        }
060/**
061 *
062 * @param os
063 * @param sequences
064 * @param headerFormat
065 * @param showExonUppercase
066 * @param lineLength
067 */
068        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) {
069                this.os = os;
070                this.sequences = sequences;
071                this.headerFormat = headerFormat;
072                this.lineLength = lineLength;
073                this.showExonUppercase = showExonUppercase;
074        }
075/**
076 *
077 * @throws Exception
078 */
079        public void process() throws Exception {
080                byte[] lineSep = System.getProperty("line.separator").getBytes();
081
082                for (GeneSequence sequence : sequences) {
083                        String header = headerFormat.getHeader(sequence);
084                        os.write('>');
085                        os.write(header.getBytes());
086                        os.write(lineSep);
087
088                        int compoundCount = 0;
089                        String seq = "";
090                        //GeneSequence currently has a strand attribute to indicate direction
091
092                        seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString();
093                        if (showExonUppercase) {
094                                StringBuilder sb = new StringBuilder(seq.toLowerCase());
095                                int geneBioBegin = sequence.getBioBegin();
096                                int geneBioEnd = sequence.getBioEnd();
097                                for (ExonSequence exonSequence : sequence.getExonSequences()) {
098                                        int featureBioBegin = 0;
099                                        int featureBioEnd = 0;
100                                        if (sequence.getStrand() != Strand.NEGATIVE) {
101                                                featureBioBegin = exonSequence.getBioBegin() - geneBioBegin;
102                                                featureBioEnd = exonSequence.getBioEnd() - geneBioBegin;
103                                        } else {
104                                                featureBioBegin = geneBioEnd - exonSequence.getBioEnd();
105                                                featureBioEnd = geneBioEnd - exonSequence.getBioBegin();
106                                        }
107                                        if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) {
108                                                logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd());
109                                        } else {
110                                                for (int i = featureBioBegin; i <= featureBioEnd; i++) {
111                                                        char ch = sb.charAt(i);
112                                                        //probably not the fastest but the safest way if language is not standard ASCII
113                                                        String temp = ch + "";
114                                                        ch = temp.toUpperCase().charAt(0);
115                                                        sb.setCharAt(i, ch);
116                                                }
117                                        }
118                                }
119                                seq = sb.toString();
120                        }
121
122                        for (int i = 0; i < seq.length(); i++) {
123                                os.write(seq.charAt(i));
124                                compoundCount++;
125                                if (compoundCount == lineLength) {
126                                        os.write(lineSep);
127                                        compoundCount = 0;
128                                }
129
130                        }
131
132
133                        //If we had sequence which was a reciprocal of line length
134                        //then don't write the line terminator as this has already written
135                        //it
136                        if ((sequence.getLength() % getLineLength()) != 0) {
137                                os.write(lineSep);
138                        }
139                }
140        }
141
142        /**
143         * @return the lineLength
144         */
145        public int getLineLength() {
146                return lineLength;
147        }
148
149        /**
150         * @param lineLength the lineLength to set
151         */
152        public void setLineLength(int lineLength) {
153                this.lineLength = lineLength;
154        }
155
156        public static void main(String[] args) {
157
158                try {
159                        ArrayList<GeneSequence> sequences = new ArrayList<GeneSequence>();
160                        ChromosomeSequence seq1 = new ChromosomeSequence("ATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGCATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGC");
161                        GeneSequence gene1 = seq1.addGene(new AccessionID("gene1"), 1, 20, Strand.POSITIVE);
162
163                        gene1.addExon(new AccessionID("t1_1_10"), 1, 10);
164                        gene1.addExon(new AccessionID("t1_12_15"), 12, 15);
165                        GeneSequence gene2 = seq1.addGene(new AccessionID("gene2"), 1, 20, Strand.NEGATIVE);
166
167                        gene2.addExon(new AccessionID("t2_1_10"), 1, 10);
168                        gene2.addExon(new AccessionID("t2_12_15"), 12, 15);
169                        sequences.add(gene1);
170                        sequences.add(gene2);
171
172
173                        FastaGeneWriter fastaWriter = new FastaGeneWriter(System.out, sequences, new GenericFastaHeaderFormat<GeneSequence, NucleotideCompound>(), true);
174                        fastaWriter.process();
175
176
177                } catch (Exception e) {
178                        logger.warn("Exception: ", e);
179                }
180        }
181}