001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.query;
022
023
024import org.biojava.nbio.core.util.XMLHelper;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027import org.w3c.dom.Document;
028import org.w3c.dom.Element;
029
030import java.util.ArrayList;
031import java.util.LinkedHashMap;
032
033/**
034 *
035 * @author Scooter Willis <willishf at gmail dot com>
036 */
037public class BlastXMLQuery {
038
039        private static final Logger logger = LoggerFactory.getLogger(BlastXMLQuery.class);
040
041        Document blastDoc = null;
042
043        public BlastXMLQuery(String blastFile) throws Exception {
044                logger.info("Start read of {}", blastFile);
045                blastDoc = XMLHelper.loadXML(blastFile);
046                logger.info("Read finished");
047        }
048
049        public LinkedHashMap<String, ArrayList<String>> getHitsQueryDef(double maxEScore) throws Exception {
050                LinkedHashMap<String, ArrayList<String>> hitsHashMap = new LinkedHashMap<String, ArrayList<String>>();
051                logger.info("Query for hits");
052                ArrayList<Element> elementList = XMLHelper.selectElements(blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]");
053                logger.info("{} hits", elementList.size());
054
055                for (Element element : elementList) {
056                        Element iterationquerydefElement = XMLHelper.selectSingleElement(element, "Iteration_query-def");
057                        String querydef = iterationquerydefElement.getTextContent();
058                        Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits");
059                        ArrayList<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit");
060                        for (Element hitElement : hitList) {
061                                Element hitaccessionElement = XMLHelper.selectSingleElement(hitElement, "Hit_accession");
062                                String hitaccession = hitaccessionElement.getTextContent();
063                                Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps");
064                                ArrayList<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp");
065                                for (Element hspElement : hspList) {
066                                        Element evalueElement = XMLHelper.selectSingleElement(hspElement, "Hsp_evalue");
067                                        String value = evalueElement.getTextContent();
068                                        double evalue = Double.parseDouble(value);
069                                        if (evalue <= maxEScore) {
070                                                ArrayList<String> hits = hitsHashMap.get(querydef);
071                                                if (hits == null) {
072                                                        hits = new ArrayList<String>();
073                                                        hitsHashMap.put(querydef, hits);
074                                                }
075                                                hits.add(hitaccession);
076                                        }
077                                }
078                        }
079                }
080
081                return hitsHashMap;
082        }
083
084        public static void main(String[] args) {
085                try {
086                        BlastXMLQuery blastXMLQuery = new BlastXMLQuery("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/c1-454Scaffolds-hits-uniprot_fungi.xml");
087                        LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(1E-10);
088                        logger.info("Hits: {}", hits);
089                } catch (Exception e) {
090                        logger.error("Execution: ", e);
091                }
092        }
093}