001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.io; 023 024import org.biojava.nbio.core.sequence.ProteinSequence; 025import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 026import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface; 028import org.biojava.nbio.core.sequence.template.Compound; 029import org.biojava.nbio.core.sequence.template.Sequence; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import java.io.BufferedOutputStream; 034import java.io.FileInputStream; 035import java.io.FileOutputStream; 036import java.io.IOException; 037import java.io.OutputStream; 038import java.util.Collection; 039import java.util.LinkedHashMap; 040 041/** 042 * The FastaWriter writes a collection of sequences to an outputStream. FastaWriterHelper should be 043 * used to write out sequences. Each sequence loaded from a fasta file retains the original Fasta header 044 * and that is used when writing to the stream. This behavior can be overwritten by implementing 045 * a custom FastaHeaderFormatInterface. 046 * 047 * @author Scooter Willis <willishf at gmail dot com> 048 */ 049public class FastaWriter<S extends Sequence<?>, C extends Compound> { 050 051 private final static Logger logger = LoggerFactory.getLogger(FastaWriter.class); 052 053 OutputStream os; 054 Collection<S> sequences; 055 FastaHeaderFormatInterface<S, C> headerFormat; 056 private int lineLength = 60; 057 byte[] lineSep = System.getProperty("line.separator").getBytes(); 058/** 059 * Use default line length of 60 060 * @param os 061 * @param sequences 062 * @param headerFormat 063 */ 064 public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat) { 065 066 this.os = os; 067 this.sequences = sequences; 068 this.headerFormat = headerFormat; 069 } 070 071/** 072 * Set custom lineLength 073 * @param os 074 * @param sequences 075 * @param headerFormat 076 * @param lineLength 077 */ 078 079 public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat, int lineLength) { 080 this.os = os; 081 this.sequences = sequences; 082 this.headerFormat = headerFormat; 083 this.lineLength = lineLength; 084 } 085 086 /** 087 * Allow an override of operating system line separator for programs that needs a specific CRLF or CR or LF option 088 * @param lineSeparator 089 */ 090 public void setLineSeparator(String lineSeparator){ 091 lineSep = lineSeparator.getBytes(); 092 } 093 094 public void process() throws IOException { 095 // boolean closeit = false; 096 097 098 099 for (S sequence : sequences) { 100 String header = headerFormat.getHeader(sequence); 101 os.write('>'); 102 os.write(header.getBytes()); 103 os.write(lineSep); 104 105 int compoundCount = 0; 106 String seq = ""; 107 108 seq = sequence.getSequenceAsString(); 109 110 for (int i = 0; i < seq.length(); i++) { 111 os.write(seq.charAt(i)); 112 compoundCount++; 113 if (compoundCount == lineLength) { 114 os.write(lineSep); 115 compoundCount = 0; 116 } 117 118 } 119 120 121 //If we had sequence which was a reciprocal of line length 122 //then don't write the line terminator as this has already written 123 //it 124 if ((sequence.getLength() % getLineLength()) != 0) { 125 os.write(lineSep); 126 } 127 } 128 129 } 130 131 public static void main(String[] args) { 132 try { 133 FileInputStream is = new FileInputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds.faa"); 134 135 136 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(is, new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); 137 LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process(); 138 is.close(); 139 140 141 // logger.debug(proteinSequences); 142 143 FileOutputStream fileOutputStream = new FileOutputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds_temp.faa"); 144 145 BufferedOutputStream bo = new BufferedOutputStream(fileOutputStream); 146 long start = System.currentTimeMillis(); 147 FastaWriter<ProteinSequence, AminoAcidCompound> fastaWriter = new FastaWriter<ProteinSequence, AminoAcidCompound>(bo, proteinSequences.values(), new GenericFastaHeaderFormat<ProteinSequence, AminoAcidCompound>()); 148 fastaWriter.process(); 149 bo.close(); 150 long end = System.currentTimeMillis(); 151 logger.info("Took {} seconds", (end - start)); 152 153 fileOutputStream.close(); 154 155 156 } catch (IOException e) { 157 logger.warn("Exception: ", e); 158 } 159 } 160 161 /** 162 * @return the lineLength 163 */ 164 public int getLineLength() { 165 return lineLength; 166 } 167 168 /** 169 * @param lineLength the lineLength to set 170 */ 171 public void setLineLength(int lineLength) { 172 this.lineLength = lineLength; 173 } 174}