001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.io;
023
024import org.biojava.nbio.core.sequence.ProteinSequence;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
026import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface;
028import org.biojava.nbio.core.sequence.template.Compound;
029import org.biojava.nbio.core.sequence.template.Sequence;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033import java.io.BufferedOutputStream;
034import java.io.FileInputStream;
035import java.io.FileOutputStream;
036import java.io.IOException;
037import java.io.OutputStream;
038import java.util.Collection;
039import java.util.LinkedHashMap;
040
041/**
042 * The FastaWriter writes a collection of sequences to an outputStream. FastaWriterHelper should be
043 * used to write out sequences. Each sequence loaded from a fasta file retains the original Fasta header
044 * and that is used when writing to the stream. This behavior can be overwritten by implementing
045 * a custom FastaHeaderFormatInterface.
046 *
047 * @author Scooter Willis <willishf at gmail dot com>
048 */
049public class FastaWriter<S extends Sequence<?>, C extends Compound> {
050
051        private final static Logger logger = LoggerFactory.getLogger(FastaWriter.class);
052
053        OutputStream os;
054        Collection<S> sequences;
055        FastaHeaderFormatInterface<S, C> headerFormat;
056        private int lineLength = 60;
057        byte[] lineSep = System.getProperty("line.separator").getBytes();
058/**
059 * Use default line length of 60
060 * @param os
061 * @param sequences
062 * @param headerFormat
063 */
064        public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat) {
065
066                this.os = os;
067                this.sequences = sequences;
068                this.headerFormat = headerFormat;
069        }
070
071/**
072 * Set custom lineLength
073 * @param os
074 * @param sequences
075 * @param headerFormat
076 * @param lineLength
077 */
078
079        public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat, int lineLength) {
080                this.os = os;
081                this.sequences = sequences;
082                this.headerFormat = headerFormat;
083                this.lineLength = lineLength;
084        }
085
086        /**
087         * Allow an override of operating system line separator for programs that needs a specific CRLF or CR or LF option
088         * @param lineSeparator
089         */
090        public void setLineSeparator(String lineSeparator){
091                lineSep = lineSeparator.getBytes();
092        }
093
094        public void process() throws IOException {
095           // boolean closeit = false;
096
097
098
099                for (S sequence : sequences) {
100                        String header = headerFormat.getHeader(sequence);
101                        os.write('>');
102                        os.write(header.getBytes());
103                        os.write(lineSep);
104
105                        int compoundCount = 0;
106                        String seq = "";
107
108                        seq = sequence.getSequenceAsString();
109
110                        for (int i = 0; i < seq.length(); i++) {
111                                os.write(seq.charAt(i));
112                                compoundCount++;
113                                if (compoundCount == lineLength) {
114                                        os.write(lineSep);
115                                        compoundCount = 0;
116                                }
117
118                        }
119
120
121                        //If we had sequence which was a reciprocal of line length
122                        //then don't write the line terminator as this has already written
123                        //it
124                        if ((sequence.getLength() % getLineLength()) != 0) {
125                                os.write(lineSep);
126                        }
127                }
128
129        }
130
131        /**
132         * @return the lineLength
133         */
134        public int getLineLength() {
135                return lineLength;
136        }
137
138        /**
139         * @param lineLength the lineLength to set
140         */
141        public void setLineLength(int lineLength) {
142                this.lineLength = lineLength;
143        }
144}