001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.query; 022 023import org.biojava.nbio.genome.parsers.gff.Feature; 024import org.biojava.nbio.genome.parsers.gff.FeatureI; 025import org.biojava.nbio.genome.parsers.gff.FeatureList; 026import org.biojava.nbio.genome.parsers.gff.GeneMarkGTFReader; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030import java.io.File; 031import java.util.ArrayList; 032import java.util.LinkedHashMap; 033 034/** 035 * 036 * @author Scooter Willis 037 */ 038public class OutputHitsGFF { 039 040 private static final Logger logger = LoggerFactory.getLogger(OutputHitsGFF.class); 041 042 public void process(File blastXMLFile, File gffFile, File gffOutputFile, double maxEScore, double percentageAligned, boolean includeFrameShift, boolean includeNegativeStrand) throws Exception { 043 BlastXMLQuery blastXMLQuery = new BlastXMLQuery(blastXMLFile.getAbsolutePath()); 044 LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(maxEScore); 045 FeatureList listGenes = GeneMarkGTFReader.read(gffFile.getAbsolutePath()); 046 FeatureList hitGenes = new FeatureList(); 047 for (String id : hits.keySet()) { 048 String[] values = id.split(" "); 049 String gene_id = values[0]; 050 FeatureList gene = listGenes.selectByAttribute("gene_id", gene_id); 051 for (FeatureI geneFeature : gene) { 052 053 if (!includeNegativeStrand && geneFeature.location().isNegative()) { 054 continue; 055 } 056 if (!includeFrameShift) { 057 boolean frameShift = false; 058 FeatureList cdsList = gene.selectByType("CDS"); 059 for(FeatureI cdsFeature : cdsList){ 060 int frame = ((Feature)cdsFeature).frame(); 061 if(frame != 0){ 062 frameShift = true; 063 break; 064 } 065 } 066 if(frameShift) 067 continue; 068 } 069 hitGenes.add(geneFeature); 070 } 071 } 072 073 // GeneMarkGTFReader.write(hitGenes, gffOutputFile.getAbsolutePath()); 074 } 075 076 077 public static void main(String[] args) { 078 try { 079 OutputHitsGFF outputHitsGFF = new OutputHitsGFF(); 080 outputHitsGFF.process(new File("hits-uniprot_fungi.xml"), 081 new File("genemark_hmm.gtf"), 082 new File("genemark_hits_hmm.gtf"), 0, 100, true, true); 083 084 085 } catch (Exception e) { 086 logger.error("Execution: ", e); 087 } 088 } 089}