001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.parsers.gff; 022 023import org.biojava.nbio.genome.GeneFeatureHelper; 024import org.biojava.nbio.core.sequence.*; 025 026import java.io.File; 027import java.io.FileOutputStream; 028import java.io.OutputStream; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.LinkedHashMap; 032 033/** 034 * 035 * @author Scooter Willis <willishf at gmail dot com> 036 */ 037public class GFF3Writer { 038 039 /** 040 * Output gff3 format for a DNA Sequence 041 * @param fileName 042 * @param chromosomeSequence 043 * @throws Exception 044 */ 045 public void write(OutputStream outputStream, LinkedHashMap<String, ChromosomeSequence> chromosomeSequenceList) throws Exception { 046 047 outputStream.write("##gff-version 3\n".getBytes()); 048 for (String key : chromosomeSequenceList.keySet()) { 049 ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(key); 050 String gff3line = ""; 051 // if(source.length() == 0){ 052 // Collection<GeneSequence> genes = chromosomeSequence.getGeneSequences().values(); 053 // for(GeneSequence gene : genes){ 054 // source = gene.getSource(); 055 // break; 056 // } 057 // } 058 // gff3line = key + "\t" + source + "\t" + "size" + "\t" + "1" + "\t" + chromosomeSequence.getBioEnd() + "\t.\t.\t.\tName=" + key + "\r\n"; 059 // outputStream.write(gff3line.getBytes()); 060 061 for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) { 062 gff3line = key + "\t" + geneSequence.getSource() + "\t" + "gene" + "\t" + geneSequence.getBioBegin() + "\t" + geneSequence.getBioEnd() + "\t"; 063 Double score = geneSequence.getSequenceScore(); 064 if (score == null) { 065 gff3line = gff3line + ".\t"; 066 } else { 067 gff3line = gff3line + score + "\t"; 068 } 069 gff3line = gff3line + geneSequence.getStrand().getStringRepresentation() + "\t"; 070 gff3line = gff3line + ".\t"; 071 gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID(); 072 gff3line = gff3line + getGFF3Note(geneSequence.getNotesList()); 073 gff3line = gff3line + "\n"; 074 outputStream.write(gff3line.getBytes()); 075 076 int transcriptIndex = 0; 077 for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) { 078 transcriptIndex++; 079 080 gff3line = key + "\t" + transcriptSequence.getSource() + "\t" + "mRNA" + "\t" + transcriptSequence.getBioBegin() + "\t" + transcriptSequence.getBioEnd() + "\t"; 081 score = transcriptSequence.getSequenceScore(); 082 if (score == null) { 083 gff3line = gff3line + ".\t"; 084 } else { 085 gff3line = gff3line + score + "\t"; 086 } 087 gff3line = gff3line + transcriptSequence.getStrand().getStringRepresentation() + "\t"; 088 gff3line = gff3line + ".\t"; 089 String id = geneSequence.getAccession().getID() + "." + transcriptIndex; 090 gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id; 091 gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList()); 092 093 gff3line = gff3line + "\n"; 094 outputStream.write(gff3line.getBytes()); 095 096 String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex; 097 ArrayList<CDSSequence> cdsSequenceList = new ArrayList<CDSSequence>(transcriptSequence.getCDSSequences().values()); 098 Collections.sort(cdsSequenceList, new SequenceComparator()); 099 for (CDSSequence cdsSequence : cdsSequenceList) { 100 gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t"; 101 score = cdsSequence.getSequenceScore(); 102 if (score == null) { 103 gff3line = gff3line + ".\t"; 104 } else { 105 gff3line = gff3line + score + "\t"; 106 } 107 gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t"; 108 gff3line = gff3line + cdsSequence.getPhase() + "\t"; 109 gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName; 110 gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList()); 111 112 gff3line = gff3line + "\n"; 113 outputStream.write(gff3line.getBytes()); 114 } 115 116 } 117 } 118 119 } 120 121 122 } 123 124 private String getGFF3Note(ArrayList<String> notesList) { 125 String notes = ""; 126 127 if (notesList.size() > 0) { 128 notes = ";Note="; 129 int noteindex = 1; 130 for (String note : notesList) { 131 notes = notes + note; 132 if (noteindex < notesList.size() - 1) { 133 notes = notes + " "; 134 } 135 } 136 137 } 138 return notes; 139 } 140 141 public static void main(String[] args) throws Exception { 142 143 if (true) { 144 FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/geneid/geneid/c1-geneid.gff3");//-16 145 LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneIDGFF2(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/geneid/geneid/c1_geneid.gff")); 146 GFF3Writer gff3Writer = new GFF3Writer(); 147 gff3Writer.write(fo, dnaSequenceList); 148 149 150 // LinkedHashMap<String, ProteinSequence> proteinSequenceList = GeneFeatureHelper.getProteinSequences(chromosomeSequenceList.values()); 151 // for(String id : proteinSequenceList.keySet()){ 152 // ProteinSequence sequence = proteinSequenceList.get(id); 153 // System.out.println(id + " " + sequence.getSequenceAsString()); 154 155 // } 156 fo.close(); 157 } 158/* 159 if (false) { 160 FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gff3");//-16 161 LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneMarkGTF(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gtf")); 162 GFF3Writer gff3Writer = new GFF3Writer(); 163 gff3Writer.write(fo, dnaSequenceList); 164 fo.close(); 165 } 166 167 if (false) { 168 LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGlimmerGFF3(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds-16.fna"), new File("/Users/Scooter/scripps/dyadic/GlimmerHMM/c1_glimmerhmm-16.gff")); 169 GFF3Writer gff3Writer = new GFF3Writer(); 170 gff3Writer.write(System.out, dnaSequenceList); 171 } 172 */ 173// System.out.println(listGenes); 174 // GeneMarkGTF.write( list, args[1] ); 175 } 176}