001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023import org.biojava.nbio.core.sequence.*;
024
025import java.io.OutputStream;
026import java.util.ArrayList;
027import java.util.Collections;
028import java.util.LinkedHashMap;
029import java.util.List;
030import java.util.Map;
031
032/**
033 *
034 * @author Scooter Willis 
035 */
036public class GFF3Writer {
037
038        /**
039         * Output gff3 format for a DNA Sequence
040         * @param outputStream
041         * @param chromosomeSequenceList
042         * @throws Exception
043         */
044        public void write(OutputStream outputStream, Map<String, ChromosomeSequence> chromosomeSequenceList) throws Exception {
045
046                outputStream.write("##gff-version 3\n".getBytes());
047                for (String key : chromosomeSequenceList.keySet()) {
048                        ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(key);
049                        String gff3line = "";
050        //         if(source.length() == 0){
051        //             Collection<GeneSequence> genes = chromosomeSequence.getGeneSequences().values();
052        //             for(GeneSequence gene : genes){
053        //                 source = gene.getSource();
054        //                 break;
055        //             }
056        //         }
057        //         gff3line = key + "\t" + source + "\t" + "size" + "\t" + "1" + "\t" + chromosomeSequence.getBioEnd() + "\t.\t.\t.\tName=" + key + "\r\n";
058        //         outputStream.write(gff3line.getBytes());
059
060                        for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) {
061                                gff3line = key + "\t" + geneSequence.getSource() + "\t" + "gene" + "\t" + geneSequence.getBioBegin() + "\t" + geneSequence.getBioEnd() + "\t";
062                                Double score = geneSequence.getSequenceScore();
063                                if (score == null) {
064                                        gff3line = gff3line + ".\t";
065                                } else {
066                                        gff3line = gff3line + score + "\t";
067                                }
068                                gff3line = gff3line + geneSequence.getStrand().getStringRepresentation() + "\t";
069                                gff3line = gff3line + ".\t";
070                                gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID();
071                                gff3line = gff3line + getGFF3Note(geneSequence.getNotesList());
072                                gff3line = gff3line + "\n";
073                                outputStream.write(gff3line.getBytes());
074
075                                int transcriptIndex = 0;
076                                for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) {
077                                        transcriptIndex++;
078
079                                        gff3line = key + "\t" + transcriptSequence.getSource() + "\t" + "mRNA" + "\t" + transcriptSequence.getBioBegin() + "\t" + transcriptSequence.getBioEnd() + "\t";
080                                        score = transcriptSequence.getSequenceScore();
081                                        if (score == null) {
082                                                gff3line = gff3line + ".\t";
083                                        } else {
084                                                gff3line = gff3line + score + "\t";
085                                        }
086                                        gff3line = gff3line + transcriptSequence.getStrand().getStringRepresentation() + "\t";
087                                        gff3line = gff3line + ".\t";
088                                        String id = geneSequence.getAccession().getID() + "." + transcriptIndex;
089                                        gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id;
090                                        gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList());
091
092                                        gff3line = gff3line + "\n";
093                                        outputStream.write(gff3line.getBytes());
094
095                                        String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex;
096                                        ArrayList<CDSSequence> cdsSequenceList = new ArrayList<>(transcriptSequence.getCDSSequences().values());
097                                        Collections.sort(cdsSequenceList, new SequenceComparator());
098                                        for (CDSSequence cdsSequence : cdsSequenceList) {
099                                                gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t";
100                                                score = cdsSequence.getSequenceScore();
101                                                if (score == null) {
102                                                        gff3line = gff3line + ".\t";
103                                                } else {
104                                                        gff3line = gff3line + score + "\t";
105                                                }
106                                                gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t";
107                                                gff3line = gff3line + cdsSequence.getPhase() + "\t";
108                                                gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName;
109                                                gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList());
110
111                                                gff3line = gff3line + "\n";
112                                                outputStream.write(gff3line.getBytes());
113                                        }
114
115                                }
116                        }
117
118                }
119
120        }
121
122        private String getGFF3Note(List<String> notesList) {
123                String notes = "";
124
125                if (notesList.size() > 0) {
126                        notes = ";Note=";
127                        int noteindex = 1;
128                        for (String note : notesList) {
129                                notes = notes + note;
130                                if (noteindex < notesList.size() - 1) {
131                                        notes = notes + " ";
132                                }
133                        }
134
135                }
136                return notes;
137        }
138
139}