001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022 023package org.biojava.nbio.core.sequence.io; 024 025import org.biojava.nbio.core.sequence.*; 026import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 027import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.io.OutputStream; 032import java.util.ArrayList; 033import java.util.Collection; 034 035/** 036 * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version. 037 * It is also an option to write out the gene sequence where the exon regions are upper case 038 * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence 039 * @author Scooter Willis <willishf at gmail dot com> 040 */ 041public class FastaGeneWriter { 042 043 private final static Logger logger = LoggerFactory.getLogger(FastaGeneWriter.class); 044 045 boolean showExonUppercase = false; 046 OutputStream os; 047 Collection<GeneSequence> sequences; 048 FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat; 049 private int lineLength = 60; 050/** 051 * 052 * @param os 053 * @param sequences 054 * @param headerFormat 055 * @param showExonUppercase 056 */ 057 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) { 058 this(os, sequences, headerFormat, showExonUppercase, 60); 059 } 060/** 061 * 062 * @param os 063 * @param sequences 064 * @param headerFormat 065 * @param showExonUppercase 066 * @param lineLength 067 */ 068 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) { 069 this.os = os; 070 this.sequences = sequences; 071 this.headerFormat = headerFormat; 072 this.lineLength = lineLength; 073 this.showExonUppercase = showExonUppercase; 074 } 075/** 076 * 077 * @throws Exception 078 */ 079 public void process() throws Exception { 080 byte[] lineSep = System.getProperty("line.separator").getBytes(); 081 082 for (GeneSequence sequence : sequences) { 083 String header = headerFormat.getHeader(sequence); 084 os.write('>'); 085 os.write(header.getBytes()); 086 os.write(lineSep); 087 088 int compoundCount = 0; 089 String seq = ""; 090 //GeneSequence currently has a strand attribute to indicate direction 091 092 seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString(); 093 if (showExonUppercase) { 094 StringBuilder sb = new StringBuilder(seq.toLowerCase()); 095 int geneBioBegin = sequence.getBioBegin(); 096 int geneBioEnd = sequence.getBioEnd(); 097 for (ExonSequence exonSequence : sequence.getExonSequences()) { 098 int featureBioBegin = 0; 099 int featureBioEnd = 0; 100 if (sequence.getStrand() != Strand.NEGATIVE) { 101 featureBioBegin = exonSequence.getBioBegin() - geneBioBegin; 102 featureBioEnd = exonSequence.getBioEnd() - geneBioBegin; 103 } else { 104 featureBioBegin = geneBioEnd - exonSequence.getBioEnd(); 105 featureBioEnd = geneBioEnd - exonSequence.getBioBegin(); 106 } 107 if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) { 108 logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd()); 109 } else { 110 for (int i = featureBioBegin; i <= featureBioEnd; i++) { 111 char ch = sb.charAt(i); 112 //probably not the fastest but the safest way if language is not standard ASCII 113 String temp = String.valueOf(ch); 114 ch = temp.toUpperCase().charAt(0); 115 sb.setCharAt(i, ch); 116 } 117 } 118 } 119 seq = sb.toString(); 120 } 121 122 for (int i = 0; i < seq.length(); i++) { 123 os.write(seq.charAt(i)); 124 compoundCount++; 125 if (compoundCount == lineLength) { 126 os.write(lineSep); 127 compoundCount = 0; 128 } 129 130 } 131 132 133 //If we had sequence which was a reciprocal of line length 134 //then don't write the line terminator as this has already written 135 //it 136 if ((sequence.getLength() % getLineLength()) != 0) { 137 os.write(lineSep); 138 } 139 } 140 } 141 142 /** 143 * @return the lineLength 144 */ 145 public int getLineLength() { 146 return lineLength; 147 } 148 149 /** 150 * @param lineLength the lineLength to set 151 */ 152 public void setLineLength(int lineLength) { 153 this.lineLength = lineLength; 154 } 155 156 public static void main(String[] args) { 157 158 try { 159 ArrayList<GeneSequence> sequences = new ArrayList<GeneSequence>(); 160 ChromosomeSequence seq1 = new ChromosomeSequence("ATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGCATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGC"); 161 GeneSequence gene1 = seq1.addGene(new AccessionID("gene1"), 1, 20, Strand.POSITIVE); 162 163 gene1.addExon(new AccessionID("t1_1_10"), 1, 10); 164 gene1.addExon(new AccessionID("t1_12_15"), 12, 15); 165 GeneSequence gene2 = seq1.addGene(new AccessionID("gene2"), 1, 20, Strand.NEGATIVE); 166 167 gene2.addExon(new AccessionID("t2_1_10"), 1, 10); 168 gene2.addExon(new AccessionID("t2_12_15"), 12, 15); 169 sequences.add(gene1); 170 sequences.add(gene2); 171 172 173 FastaGeneWriter fastaWriter = new FastaGeneWriter(System.out, sequences, new GenericFastaHeaderFormat<GeneSequence, NucleotideCompound>(), true); 174 fastaWriter.process(); 175 176 177 } catch (Exception e) { 178 logger.warn("Exception: ", e); 179 } 180 } 181}