001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.query; 022 023 024import org.biojava.nbio.core.util.XMLHelper; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027import org.w3c.dom.Document; 028import org.w3c.dom.Element; 029 030import java.util.ArrayList; 031import java.util.LinkedHashMap; 032import java.util.List; 033 034/** 035 * 036 * @author Scooter Willis 037 */ 038public class BlastXMLQuery { 039 040 private static final Logger logger = LoggerFactory.getLogger(BlastXMLQuery.class); 041 042 Document blastDoc = null; 043 044 public BlastXMLQuery(String blastFile) throws Exception { 045 logger.info("Start read of {}", blastFile); 046 blastDoc = XMLHelper.loadXML(blastFile); 047 logger.info("Read finished"); 048 } 049 050 public LinkedHashMap<String, ArrayList<String>> getHitsQueryDef(double maxEScore) throws Exception { 051 LinkedHashMap<String, ArrayList<String>> hitsHashMap = new LinkedHashMap<>(); 052 logger.info("Query for hits"); 053 List<Element> elementList = XMLHelper.selectElements(blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]"); 054 logger.info("{} hits", elementList.size()); 055 056 for (Element element : elementList) { 057 Element iterationquerydefElement = XMLHelper.selectSingleElement(element, "Iteration_query-def"); 058 String querydef = iterationquerydefElement.getTextContent(); 059 Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits"); 060 List<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit"); 061 for (Element hitElement : hitList) { 062 Element hitaccessionElement = XMLHelper.selectSingleElement(hitElement, "Hit_accession"); 063 String hitaccession = hitaccessionElement.getTextContent(); 064 Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps"); 065 List<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp"); 066 for (Element hspElement : hspList) { 067 Element evalueElement = XMLHelper.selectSingleElement(hspElement, "Hsp_evalue"); 068 String value = evalueElement.getTextContent(); 069 double evalue = Double.parseDouble(value); 070 if (evalue <= maxEScore) { 071 ArrayList<String> hits = hitsHashMap.get(querydef); 072 if (hits == null) { 073 hits = new ArrayList<>(); 074 hitsHashMap.put(querydef, hits); 075 } 076 hits.add(hitaccession); 077 } 078 } 079 } 080 } 081 082 return hitsHashMap; 083 } 084 085 public static void main(String[] args) { 086 try { 087 BlastXMLQuery blastXMLQuery = new BlastXMLQuery("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/c1-454Scaffolds-hits-uniprot_fungi.xml"); 088 LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(1E-10); 089 logger.info("Hits: {}", hits); 090 } catch (Exception e) { 091 logger.error("Execution: ", e); 092 } 093 } 094}