001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.query; 022 023 024import org.biojava.nbio.core.util.XMLHelper; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027import org.w3c.dom.Document; 028import org.w3c.dom.Element; 029 030import java.util.ArrayList; 031import java.util.LinkedHashMap; 032 033/** 034 * 035 * @author Scooter Willis <willishf at gmail dot com> 036 */ 037public class BlastXMLQuery { 038 039 private static final Logger logger = LoggerFactory.getLogger(BlastXMLQuery.class); 040 041 Document blastDoc = null; 042 043 public BlastXMLQuery(String blastFile) throws Exception { 044 logger.info("Start read of {}", blastFile); 045 blastDoc = XMLHelper.loadXML(blastFile); 046 logger.info("Read finished"); 047 } 048 049 public LinkedHashMap<String, ArrayList<String>> getHitsQueryDef(double maxEScore) throws Exception { 050 LinkedHashMap<String, ArrayList<String>> hitsHashMap = new LinkedHashMap<String, ArrayList<String>>(); 051 logger.info("Query for hits"); 052 ArrayList<Element> elementList = XMLHelper.selectElements(blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]"); 053 logger.info("{} hits", elementList.size()); 054 055 for (Element element : elementList) { 056 Element iterationquerydefElement = XMLHelper.selectSingleElement(element, "Iteration_query-def"); 057 String querydef = iterationquerydefElement.getTextContent(); 058 Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits"); 059 ArrayList<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit"); 060 for (Element hitElement : hitList) { 061 Element hitaccessionElement = XMLHelper.selectSingleElement(hitElement, "Hit_accession"); 062 String hitaccession = hitaccessionElement.getTextContent(); 063 Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps"); 064 ArrayList<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp"); 065 for (Element hspElement : hspList) { 066 Element evalueElement = XMLHelper.selectSingleElement(hspElement, "Hsp_evalue"); 067 String value = evalueElement.getTextContent(); 068 double evalue = Double.parseDouble(value); 069 if (evalue <= maxEScore) { 070 ArrayList<String> hits = hitsHashMap.get(querydef); 071 if (hits == null) { 072 hits = new ArrayList<String>(); 073 hitsHashMap.put(querydef, hits); 074 } 075 hits.add(hitaccession); 076 } 077 } 078 } 079 } 080 081 return hitsHashMap; 082 } 083 084 public static void main(String[] args) { 085 try { 086 BlastXMLQuery blastXMLQuery = new BlastXMLQuery("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/c1-454Scaffolds-hits-uniprot_fungi.xml"); 087 LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(1E-10); 088 logger.info("Hits: {}", hits); 089 } catch (Exception e) { 090 logger.error("Execution: ", e); 091 } 092 } 093}