001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021/** 022 * 023 */ 024package org.biojava.nbio.core.sequence.io; 025 026import org.biojava.nbio.core.sequence.io.template.GenbankHeaderFormatInterface; 027import org.biojava.nbio.core.sequence.template.Compound; 028import org.biojava.nbio.core.sequence.template.Sequence; 029import org.biojava.nbio.core.util.StringManipulationHelper; 030 031import java.io.OutputStream; 032import java.io.PrintWriter; 033import java.util.Collection; 034 035 036/** 037 * @author mckeee1 038 * 039 */ 040public class GenbankWriter<S extends Sequence<?>, C extends Compound> { 041 int SEQUENCE_INDENT = 9; 042 043 OutputStream os; 044 Collection<S> sequences; 045 GenbankHeaderFormatInterface<S, C> headerFormat; 046 private int lineLength = 60; 047 048 // byte[] lineSep = System.getProperty("line.separator").getBytes(); 049 /** 050 * Use default line length of 60 051 * 052 * @param os 053 * @param sequences 054 * @param headerFormat 055 */ 056 public GenbankWriter(OutputStream os, Collection<S> sequences, 057 GenbankHeaderFormatInterface<S, C> headerFormat) { 058 059 this.os = os; 060 this.sequences = sequences; 061 this.headerFormat = headerFormat; 062 } 063 064 /** 065 * Set custom lineLength 066 * 067 * @param os 068 * @param sequences 069 * @param headerFormat 070 * @param lineLength 071 */ 072 073 public GenbankWriter(OutputStream os, Collection<S> sequences, 074 GenbankHeaderFormatInterface<S, C> headerFormat, int lineLength) { 075 this.os = os; 076 this.sequences = sequences; 077 this.headerFormat = headerFormat; 078 this.lineLength = lineLength; 079 } 080 081 /** 082 * Allow an override of operating system line separator for programs that 083 * needs a specific CRLF or CR or LF option 084 * 085 * @param lineSeparator 086 */ 087 088 public void process() throws Exception { 089 // Loosely based on code from Howard Salis 090 // TODO - Force lower case? 091 // boolean closeit = false; 092 PrintWriter writer = new PrintWriter(os); 093 for (S sequence : sequences) { 094 String header = headerFormat.getHeader(sequence); 095 writer.format(header); 096 writer.println(); 097 // os.write(lineSep); 098 099 /* 100 * if isinstance(record.seq, UnknownSeq): #We have already recorded 101 * the length, and there is no need #to record a long sequence of 102 * NNNNNNN...NNN or whatever. if "contig" in record.annotations: 103 * self._write_contig(record) else: self.handle.write("ORIGIN\n") 104 * return 105 */ 106 107 String data = sequence.getSequenceAsString().toLowerCase(); 108 int seq_len = data.length(); 109 writer.println("ORIGIN"); 110 // os.write(lineSep); 111 112 for (int line_number = 0; line_number < seq_len; line_number += lineLength) { 113 writer.print(StringManipulationHelper.padLeft( 114 Integer.toString(line_number + 1), SEQUENCE_INDENT)); 115 for (int words = line_number; words < Math.min(line_number 116 + lineLength, seq_len); words += 10) { 117 if ((words + 10) > data.length()) { 118 writer.print((" " + data.substring(words))); 119 } else { 120 writer.print((" " + data.substring(words, words + 10))); 121 } 122 } 123 // os.write(lineSep); 124 writer.println(); 125 } 126 127 writer.println("//"); 128 129 } 130 131 writer.flush(); 132 133 } 134 135 /* 136 * public static void main(String[] args) { try { FileInputStream is = new 137 * FileInputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds.faa"); 138 * 139 * 140 * FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new 141 * FastaReader<ProteinSequence, AminoAcidCompound>(is, new 142 * GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), new 143 * ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); 144 * LinkedHashMap<String, ProteinSequence> proteinSequences = 145 * fastaReader.process(); is.close(); 146 * 147 * 148 * // System.out.println(proteinSequences); 149 * 150 * FileOutputStream fileOutputStream = new 151 * FileOutputStream("/Users/Scooter/scripps/dyadic/c1-454Scaffolds_temp.faa" 152 * ); 153 * 154 * BufferedOutputStream bo = new BufferedOutputStream(fileOutputStream); 155 * long start = System.currentTimeMillis(); FastaWriter<ProteinSequence, 156 * AminoAcidCompound> fastaWriter = new FastaWriter<ProteinSequence, 157 * AminoAcidCompound>(bo, proteinSequences.values(), new 158 * GenericFastaHeaderFormat<ProteinSequence, AminoAcidCompound>()); 159 * fastaWriter.process(); bo.close(); long end = System.currentTimeMillis(); 160 * System.out.println("Took " + (end - start) + " seconds"); 161 * 162 * fileOutputStream.close(); 163 * 164 * 165 * } catch (Exception e) { e.printStackTrace(); } } 166 */ 167 /** 168 * @return the lineLength 169 */ 170 public int getLineLength() { 171 return lineLength; 172 } 173 174 /** 175 * @param lineLength 176 * the lineLength to set 177 */ 178 public void setLineLength(int lineLength) { 179 this.lineLength = lineLength; 180 } 181 182}