001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022 023package org.biojava.nbio.core.sequence.io; 024 025import org.biojava.nbio.core.sequence.*; 026import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.io.OutputStream; 032import java.util.Collection; 033 034/** 035 * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version. 036 * It is also an option to write out the gene sequence where the exon regions are upper case 037 * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence 038 * @author Scooter Willis <willishf at gmail dot com> 039 */ 040public class FastaGeneWriter { 041 042 private final static Logger logger = LoggerFactory.getLogger(FastaGeneWriter.class); 043 044 boolean showExonUppercase = false; 045 OutputStream os; 046 Collection<GeneSequence> sequences; 047 FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat; 048 private int lineLength = 60; 049/** 050 * 051 * @param os 052 * @param sequences 053 * @param headerFormat 054 * @param showExonUppercase 055 */ 056 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) { 057 this(os, sequences, headerFormat, showExonUppercase, 60); 058 } 059/** 060 * 061 * @param os 062 * @param sequences 063 * @param headerFormat 064 * @param showExonUppercase 065 * @param lineLength 066 */ 067 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) { 068 this.os = os; 069 this.sequences = sequences; 070 this.headerFormat = headerFormat; 071 this.lineLength = lineLength; 072 this.showExonUppercase = showExonUppercase; 073 } 074/** 075 * 076 * @throws Exception 077 */ 078 public void process() throws Exception { 079 byte[] lineSep = System.getProperty("line.separator").getBytes(); 080 081 for (GeneSequence sequence : sequences) { 082 String header = headerFormat.getHeader(sequence); 083 os.write('>'); 084 os.write(header.getBytes()); 085 os.write(lineSep); 086 087 int compoundCount = 0; 088 String seq = ""; 089 //GeneSequence currently has a strand attribute to indicate direction 090 091 seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString(); 092 if (showExonUppercase) { 093 StringBuilder sb = new StringBuilder(seq.toLowerCase()); 094 int geneBioBegin = sequence.getBioBegin(); 095 int geneBioEnd = sequence.getBioEnd(); 096 for (ExonSequence exonSequence : sequence.getExonSequences()) { 097 int featureBioBegin = 0; 098 int featureBioEnd = 0; 099 if (sequence.getStrand() != Strand.NEGATIVE) { 100 featureBioBegin = exonSequence.getBioBegin() - geneBioBegin; 101 featureBioEnd = exonSequence.getBioEnd() - geneBioBegin; 102 } else { 103 featureBioBegin = geneBioEnd - exonSequence.getBioEnd(); 104 featureBioEnd = geneBioEnd - exonSequence.getBioBegin(); 105 } 106 if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) { 107 logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd()); 108 } else { 109 for (int i = featureBioBegin; i <= featureBioEnd; i++) { 110 char ch = sb.charAt(i); 111 //probably not the fastest but the safest way if language is not standard ASCII 112 String temp = String.valueOf(ch); 113 ch = temp.toUpperCase().charAt(0); 114 sb.setCharAt(i, ch); 115 } 116 } 117 } 118 seq = sb.toString(); 119 } 120 121 for (int i = 0; i < seq.length(); i++) { 122 os.write(seq.charAt(i)); 123 compoundCount++; 124 if (compoundCount == lineLength) { 125 os.write(lineSep); 126 compoundCount = 0; 127 } 128 129 } 130 131 132 //If we had sequence which was a reciprocal of line length 133 //then don't write the line terminator as this has already written 134 //it 135 if ((sequence.getLength() % getLineLength()) != 0) { 136 os.write(lineSep); 137 } 138 } 139 } 140 141 /** 142 * @return the lineLength 143 */ 144 public int getLineLength() { 145 return lineLength; 146 } 147 148 /** 149 * @param lineLength the lineLength to set 150 */ 151 public void setLineLength(int lineLength) { 152 this.lineLength = lineLength; 153 } 154 155 156}