001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.parsers.gff; 022 023import org.biojava.nbio.core.sequence.*; 024 025import java.io.OutputStream; 026import java.util.ArrayList; 027import java.util.Collections; 028import java.util.LinkedHashMap; 029import java.util.List; 030import java.util.Map; 031 032/** 033 * 034 * @author Scooter Willis 035 */ 036public class GFF3Writer { 037 038 /** 039 * Output gff3 format for a DNA Sequence 040 * @param outputStream 041 * @param chromosomeSequenceList 042 * @throws Exception 043 */ 044 public void write(OutputStream outputStream, Map<String, ChromosomeSequence> chromosomeSequenceList) throws Exception { 045 046 outputStream.write("##gff-version 3\n".getBytes()); 047 for (String key : chromosomeSequenceList.keySet()) { 048 ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(key); 049 String gff3line = ""; 050 // if(source.length() == 0){ 051 // Collection<GeneSequence> genes = chromosomeSequence.getGeneSequences().values(); 052 // for(GeneSequence gene : genes){ 053 // source = gene.getSource(); 054 // break; 055 // } 056 // } 057 // gff3line = key + "\t" + source + "\t" + "size" + "\t" + "1" + "\t" + chromosomeSequence.getBioEnd() + "\t.\t.\t.\tName=" + key + "\r\n"; 058 // outputStream.write(gff3line.getBytes()); 059 060 for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) { 061 gff3line = key + "\t" + geneSequence.getSource() + "\t" + "gene" + "\t" + geneSequence.getBioBegin() + "\t" + geneSequence.getBioEnd() + "\t"; 062 Double score = geneSequence.getSequenceScore(); 063 if (score == null) { 064 gff3line = gff3line + ".\t"; 065 } else { 066 gff3line = gff3line + score + "\t"; 067 } 068 gff3line = gff3line + geneSequence.getStrand().getStringRepresentation() + "\t"; 069 gff3line = gff3line + ".\t"; 070 gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID(); 071 gff3line = gff3line + getGFF3Note(geneSequence.getNotesList()); 072 gff3line = gff3line + "\n"; 073 outputStream.write(gff3line.getBytes()); 074 075 int transcriptIndex = 0; 076 for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) { 077 transcriptIndex++; 078 079 gff3line = key + "\t" + transcriptSequence.getSource() + "\t" + "mRNA" + "\t" + transcriptSequence.getBioBegin() + "\t" + transcriptSequence.getBioEnd() + "\t"; 080 score = transcriptSequence.getSequenceScore(); 081 if (score == null) { 082 gff3line = gff3line + ".\t"; 083 } else { 084 gff3line = gff3line + score + "\t"; 085 } 086 gff3line = gff3line + transcriptSequence.getStrand().getStringRepresentation() + "\t"; 087 gff3line = gff3line + ".\t"; 088 String id = geneSequence.getAccession().getID() + "." + transcriptIndex; 089 gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id; 090 gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList()); 091 092 gff3line = gff3line + "\n"; 093 outputStream.write(gff3line.getBytes()); 094 095 String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex; 096 ArrayList<CDSSequence> cdsSequenceList = new ArrayList<>(transcriptSequence.getCDSSequences().values()); 097 Collections.sort(cdsSequenceList, new SequenceComparator()); 098 for (CDSSequence cdsSequence : cdsSequenceList) { 099 gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t"; 100 score = cdsSequence.getSequenceScore(); 101 if (score == null) { 102 gff3line = gff3line + ".\t"; 103 } else { 104 gff3line = gff3line + score + "\t"; 105 } 106 gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t"; 107 gff3line = gff3line + cdsSequence.getPhase() + "\t"; 108 gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName; 109 gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList()); 110 111 gff3line = gff3line + "\n"; 112 outputStream.write(gff3line.getBytes()); 113 } 114 115 } 116 } 117 118 } 119 120 } 121 122 private String getGFF3Note(List<String> notesList) { 123 String notes = ""; 124 125 if (notesList.size() > 0) { 126 notes = ";Note="; 127 int noteindex = 1; 128 for (String note : notesList) { 129 notes = notes + note; 130 if (noteindex < notesList.size() - 1) { 131 notes = notes + " "; 132 } 133 } 134 135 } 136 return notes; 137 } 138 139}