001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.query;
022
023
024import org.biojava.nbio.core.util.XMLHelper;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027import org.w3c.dom.Document;
028import org.w3c.dom.Element;
029
030import java.util.ArrayList;
031import java.util.LinkedHashMap;
032import java.util.List;
033
034/**
035 *
036 * @author Scooter Willis 
037 */
038public class BlastXMLQuery {
039
040        private static final Logger logger = LoggerFactory.getLogger(BlastXMLQuery.class);
041
042        Document blastDoc = null;
043
044        public BlastXMLQuery(String blastFile) throws Exception {
045                logger.info("Start read of {}", blastFile);
046                blastDoc = XMLHelper.loadXML(blastFile);
047                logger.info("Read finished");
048        }
049
050        public LinkedHashMap<String, ArrayList<String>> getHitsQueryDef(double maxEScore) throws Exception {
051                LinkedHashMap<String, ArrayList<String>> hitsHashMap = new LinkedHashMap<>();
052                logger.info("Query for hits");
053                List<Element> elementList = XMLHelper.selectElements(blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]");
054                logger.info("{} hits", elementList.size());
055
056                for (Element element : elementList) {
057                        Element iterationquerydefElement = XMLHelper.selectSingleElement(element, "Iteration_query-def");
058                        String querydef = iterationquerydefElement.getTextContent();
059                        Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits");
060                        List<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit");
061                        for (Element hitElement : hitList) {
062                                Element hitaccessionElement = XMLHelper.selectSingleElement(hitElement, "Hit_accession");
063                                String hitaccession = hitaccessionElement.getTextContent();
064                                Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps");
065                                List<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp");
066                                for (Element hspElement : hspList) {
067                                        Element evalueElement = XMLHelper.selectSingleElement(hspElement, "Hsp_evalue");
068                                        String value = evalueElement.getTextContent();
069                                        double evalue = Double.parseDouble(value);
070                                        if (evalue <= maxEScore) {
071                                                ArrayList<String> hits = hitsHashMap.get(querydef);
072                                                if (hits == null) {
073                                                        hits = new ArrayList<>();
074                                                        hitsHashMap.put(querydef, hits);
075                                                }
076                                                hits.add(hitaccession);
077                                        }
078                                }
079                        }
080                }
081
082                return hitsHashMap;
083        }
084
085        public static void main(String[] args) {
086                try {
087                        BlastXMLQuery blastXMLQuery = new BlastXMLQuery("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/c1-454Scaffolds-hits-uniprot_fungi.xml");
088                        LinkedHashMap<String, ArrayList<String>> hits = blastXMLQuery.getHitsQueryDef(1E-10);
089                        logger.info("Hits: {}", hits);
090                } catch (Exception e) {
091                        logger.error("Execution: ", e);
092                }
093        }
094}