001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.query;
022
023import org.biojava.nbio.genome.parsers.gff.Feature;
024import org.biojava.nbio.genome.parsers.gff.FeatureI;
025import org.biojava.nbio.genome.parsers.gff.FeatureList;
026import org.biojava.nbio.genome.parsers.gff.GeneMarkGTFReader;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import java.io.File;
031import java.util.ArrayList;
032import java.util.LinkedHashMap;
033
034/**
035 *
036 * @author Scooter Willis 
037 */
038public class OutputHitsGFF {
039
040        private static final Logger logger = LoggerFactory.getLogger(OutputHitsGFF.class);
041
042        public void process(File blastXMLFile, File gffFile, File gffOutputFile, double maxEScore, double percentageAligned, boolean includeFrameShift, boolean includeNegativeStrand) throws Exception {
043                BlastXMLQuery blastXMLQuery = new BlastXMLQuery(blastXMLFile.getAbsolutePath());
044                LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(maxEScore);
045                FeatureList listGenes = GeneMarkGTFReader.read(gffFile.getAbsolutePath());
046                FeatureList hitGenes = new FeatureList();
047                for (String id : hits.keySet()) {
048                        String[] values = id.split(" ");
049                        String gene_id = values[0];
050                        FeatureList gene = listGenes.selectByAttribute("gene_id", gene_id);
051                        for (FeatureI geneFeature : gene) {
052
053                                if (!includeNegativeStrand && geneFeature.location().isNegative()) {
054                                        continue;
055                                }
056                                if (!includeFrameShift) {
057                                        boolean frameShift = false;
058                                        FeatureList cdsList = gene.selectByType("CDS");
059                                        for(FeatureI cdsFeature : cdsList){
060                                                int frame = ((Feature)cdsFeature).frame();
061                                                if(frame != 0){
062                                                        frameShift = true;
063                                                        break;
064                                                }
065                                        }
066                                        if(frameShift)
067                                                continue;
068                                }
069                                hitGenes.add(geneFeature);
070                        }
071                }
072
073        //    GeneMarkGTFReader.write(hitGenes, gffOutputFile.getAbsolutePath());
074        }
075
076
077                public static void main(String[] args) {
078                try {
079                        OutputHitsGFF outputHitsGFF = new OutputHitsGFF();
080                        outputHitsGFF.process(new File("hits-uniprot_fungi.xml"),
081                                        new File("genemark_hmm.gtf"),
082                                        new File("genemark_hits_hmm.gtf"), 0, 100, true, true);
083
084
085                } catch (Exception e) {
086                        logger.error("Execution: ", e);
087                }
088        }
089}