001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021/**
022 *
023 */
024package org.biojava.nbio.core.sequence.io;
025
026import org.biojava.nbio.core.sequence.io.template.GenbankHeaderFormatInterface;
027import org.biojava.nbio.core.sequence.template.Compound;
028import org.biojava.nbio.core.sequence.template.Sequence;
029import org.biojava.nbio.core.util.StringManipulationHelper;
030
031import java.io.OutputStream;
032import java.io.PrintWriter;
033import java.util.Collection;
034
035
036/**
037 * @author mckeee1
038 *
039 */
040public class GenbankWriter<S extends Sequence<?>, C extends Compound> {
041        int SEQUENCE_INDENT = 9;
042
043        OutputStream os;
044        Collection<S> sequences;
045        GenbankHeaderFormatInterface<S, C> headerFormat;
046        private int lineLength = 60;
047
048        // byte[] lineSep = System.getProperty("line.separator").getBytes();
049        /**
050         * Use default line length of 60
051         *
052         * @param os
053         * @param sequences
054         * @param headerFormat
055         */
056        public GenbankWriter(OutputStream os, Collection<S> sequences,
057                        GenbankHeaderFormatInterface<S, C> headerFormat) {
058
059                this.os = os;
060                this.sequences = sequences;
061                this.headerFormat = headerFormat;
062        }
063
064        /**
065         * Set custom lineLength
066         *
067         * @param os
068         * @param sequences
069         * @param headerFormat
070         * @param lineLength
071         */
072
073        public GenbankWriter(OutputStream os, Collection<S> sequences,
074                        GenbankHeaderFormatInterface<S, C> headerFormat, int lineLength) {
075                this.os = os;
076                this.sequences = sequences;
077                this.headerFormat = headerFormat;
078                this.lineLength = lineLength;
079        }
080
081        /**
082         * Allow an override of operating system line separator for programs that
083         * needs a specific CRLF or CR or LF option
084         *
085         * @param lineSeparator
086         */
087
088        public void process() throws Exception {
089                // Loosely based on code from Howard Salis
090                // TODO - Force lower case?
091                // boolean closeit = false;
092                PrintWriter writer = new PrintWriter(os);
093                for (S sequence : sequences) {
094                        String header = headerFormat.getHeader(sequence);
095                        writer.format(header);
096                        writer.println();
097                        // os.write(lineSep);
098
099                        /*
100                         * if isinstance(record.seq, UnknownSeq): #We have already recorded
101                         * the length, and there is no need #to record a long sequence of
102                         * NNNNNNN...NNN or whatever. if "contig" in record.annotations:
103                         * self._write_contig(record) else: self.handle.write("ORIGIN\n")
104                         * return
105                         */
106
107                        String data = sequence.getSequenceAsString().toLowerCase();
108                        int seq_len = data.length();
109                        writer.println("ORIGIN");
110                        // os.write(lineSep);
111
112                        for (int line_number = 0; line_number < seq_len; line_number += lineLength) {
113                                writer.print(StringManipulationHelper.padLeft(
114                                                Integer.toString(line_number + 1), SEQUENCE_INDENT));
115                                for (int words = line_number; words < Math.min(line_number
116                                                + lineLength, seq_len); words += 10) {
117                                        if ((words + 10) > data.length()) {
118                                                writer.print((" " + data.substring(words)));
119                                        } else {
120                                                writer.print((" " + data.substring(words, words + 10)));
121                                        }
122                                }
123                                // os.write(lineSep);
124                                writer.println();
125                        }
126
127                        writer.println("//");
128
129                }
130
131                writer.flush();
132
133        }
134
135        /*
136         * public static void main(String[] args) { try { FileInputStream is = new
137         * FileInputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds.faa");
138         *
139         *
140         * FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new
141         * FastaReader<ProteinSequence, AminoAcidCompound>(is, new
142         * GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), new
143         * ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
144         * LinkedHashMap<String, ProteinSequence> proteinSequences =
145         * fastaReader.process(); is.close();
146         *
147         *
148         * // System.out.println(proteinSequences);
149         *
150         * FileOutputStream fileOutputStream = new
151         * FileOutputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds_temp.faa"
152         * );
153         *
154         * BufferedOutputStream bo = new BufferedOutputStream(fileOutputStream);
155         * long start = System.currentTimeMillis(); FastaWriter<ProteinSequence,
156         * AminoAcidCompound> fastaWriter = new FastaWriter<ProteinSequence,
157         * AminoAcidCompound>(bo, proteinSequences.values(), new
158         * GenericFastaHeaderFormat<ProteinSequence, AminoAcidCompound>());
159         * fastaWriter.process(); bo.close(); long end = System.currentTimeMillis();
160         * System.out.println("Took " + (end - start) + " seconds");
161         *
162         * fileOutputStream.close();
163         *
164         *
165         * } catch (Exception e) { e.printStackTrace(); } }
166         */
167        /**
168         * @return the lineLength
169         */
170        public int getLineLength() {
171                return lineLength;
172        }
173
174        /**
175         * @param lineLength
176         *            the lineLength to set
177         */
178        public void setLineLength(int lineLength) {
179                this.lineLength = lineLength;
180        }
181
182}