001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023import org.biojava.nbio.genome.GeneFeatureHelper;
024import org.biojava.nbio.core.sequence.*;
025
026import java.io.File;
027import java.io.FileOutputStream;
028import java.io.OutputStream;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.LinkedHashMap;
032
033/**
034 *
035 * @author Scooter Willis <willishf at gmail dot com>
036 */
037public class GFF3Writer {
038
039        /**
040         * Output gff3 format for a DNA Sequence
041         * @param fileName
042         * @param chromosomeSequence
043         * @throws Exception
044         */
045        public void write(OutputStream outputStream, LinkedHashMap<String, ChromosomeSequence> chromosomeSequenceList) throws Exception {
046
047                outputStream.write("##gff-version 3\n".getBytes());
048                for (String key : chromosomeSequenceList.keySet()) {
049                        ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(key);
050                        String gff3line = "";
051        //         if(source.length() == 0){
052        //             Collection<GeneSequence> genes = chromosomeSequence.getGeneSequences().values();
053        //             for(GeneSequence gene : genes){
054        //                 source = gene.getSource();
055        //                 break;
056        //             }
057        //         }
058        //         gff3line = key + "\t" + source + "\t" + "size" + "\t" + "1" + "\t" + chromosomeSequence.getBioEnd() + "\t.\t.\t.\tName=" + key + "\r\n";
059        //         outputStream.write(gff3line.getBytes());
060
061                        for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) {
062                                gff3line = key + "\t" + geneSequence.getSource() + "\t" + "gene" + "\t" + geneSequence.getBioBegin() + "\t" + geneSequence.getBioEnd() + "\t";
063                                Double score = geneSequence.getSequenceScore();
064                                if (score == null) {
065                                        gff3line = gff3line + ".\t";
066                                } else {
067                                        gff3line = gff3line + score + "\t";
068                                }
069                                gff3line = gff3line + geneSequence.getStrand().getStringRepresentation() + "\t";
070                                gff3line = gff3line + ".\t";
071                                gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID();
072                                gff3line = gff3line + getGFF3Note(geneSequence.getNotesList());
073                                gff3line = gff3line + "\n";
074                                outputStream.write(gff3line.getBytes());
075
076                                int transcriptIndex = 0;
077                                for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) {
078                                        transcriptIndex++;
079
080                                        gff3line = key + "\t" + transcriptSequence.getSource() + "\t" + "mRNA" + "\t" + transcriptSequence.getBioBegin() + "\t" + transcriptSequence.getBioEnd() + "\t";
081                                        score = transcriptSequence.getSequenceScore();
082                                        if (score == null) {
083                                                gff3line = gff3line + ".\t";
084                                        } else {
085                                                gff3line = gff3line + score + "\t";
086                                        }
087                                        gff3line = gff3line + transcriptSequence.getStrand().getStringRepresentation() + "\t";
088                                        gff3line = gff3line + ".\t";
089                                        String id = geneSequence.getAccession().getID() + "." + transcriptIndex;
090                                        gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id;
091                                        gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList());
092
093                                        gff3line = gff3line + "\n";
094                                        outputStream.write(gff3line.getBytes());
095
096                                        String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex;
097                                        ArrayList<CDSSequence> cdsSequenceList = new ArrayList<CDSSequence>(transcriptSequence.getCDSSequences().values());
098                                        Collections.sort(cdsSequenceList, new SequenceComparator());
099                                        for (CDSSequence cdsSequence : cdsSequenceList) {
100                                                gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t";
101                                                score = cdsSequence.getSequenceScore();
102                                                if (score == null) {
103                                                        gff3line = gff3line + ".\t";
104                                                } else {
105                                                        gff3line = gff3line + score + "\t";
106                                                }
107                                                gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t";
108                                                gff3line = gff3line + cdsSequence.getPhase() + "\t";
109                                                gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName;
110                                                gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList());
111
112                                                gff3line = gff3line + "\n";
113                                                outputStream.write(gff3line.getBytes());
114                                        }
115
116                                }
117                        }
118
119                }
120
121
122        }
123
124        private String getGFF3Note(ArrayList<String> notesList) {
125                String notes = "";
126
127                if (notesList.size() > 0) {
128                        notes = ";Note=";
129                        int noteindex = 1;
130                        for (String note : notesList) {
131                                notes = notes + note;
132                                if (noteindex < notesList.size() - 1) {
133                                        notes = notes + " ";
134                                }
135                        }
136
137                }
138                return notes;
139        }
140
141        public static void main(String args[]) throws Exception {
142
143                if (true) {
144                        FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/geneid/geneid/c1-geneid.gff3");//-16
145                        LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneIDGFF2(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/geneid/geneid/c1_geneid.gff"));
146                        GFF3Writer gff3Writer = new GFF3Writer();
147                        gff3Writer.write(fo, dnaSequenceList);
148
149
150         //       LinkedHashMap<String, ProteinSequence> proteinSequenceList = GeneFeatureHelper.getProteinSequences(chromosomeSequenceList.values());
151         //       for(String id : proteinSequenceList.keySet()){
152         //           ProteinSequence sequence = proteinSequenceList.get(id);
153         //           System.out.println(id + " " + sequence.getSequenceAsString());
154
155         //       }
156                        fo.close();
157                }
158/*
159                if (false) {
160                        FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gff3");//-16
161                        LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGeneMarkGTF(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"), new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/genemark_hmm.gtf"));
162                        GFF3Writer gff3Writer = new GFF3Writer();
163                        gff3Writer.write(fo, dnaSequenceList);
164                        fo.close();
165                }
166
167                if (false) {
168                        LinkedHashMap<String, ChromosomeSequence> dnaSequenceList = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGlimmerGFF3(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds-16.fna"), new File("/Users/Scooter/scripps/dyadic/GlimmerHMM/c1_glimmerhmm-16.gff"));
169                        GFF3Writer gff3Writer = new GFF3Writer();
170                        gff3Writer.write(System.out, dnaSequenceList);
171                }
172                */
173//        System.out.println(listGenes);
174                //      GeneMarkGTF.write( list, args[1] );
175        }
176}