001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.util;
022
023import org.w3c.dom.Document;
024import org.w3c.dom.Element;
025import org.w3c.dom.Node;
026import org.w3c.dom.NodeList;
027import org.xml.sax.SAXException;
028
029import javax.xml.parsers.DocumentBuilder;
030import javax.xml.parsers.DocumentBuilderFactory;
031import javax.xml.parsers.ParserConfigurationException;
032import javax.xml.transform.Transformer;
033import javax.xml.transform.TransformerException;
034import javax.xml.transform.TransformerFactory;
035import javax.xml.transform.dom.DOMSource;
036import javax.xml.transform.stream.StreamResult;
037import javax.xml.XMLConstants;
038import javax.xml.xpath.XPath;
039import javax.xml.xpath.XPathConstants;
040import javax.xml.xpath.XPathExpressionException;
041import javax.xml.xpath.XPathFactory;
042import java.io.*;
043import java.util.ArrayList;
044
045import static org.biojava.nbio.core.sequence.io.util.IOUtils.close;
046import static org.biojava.nbio.core.sequence.io.util.IOUtils.openFile;
047
048/**
049 * Helper methods to simplify boilerplate XML parsing code for  {@code}org.w3c.dom{@code} XML objects
050 * @author Scooter
051 */
052public class XMLHelper {
053
054        /**
055         * Creates a new element called {@code}elementName{@code} and adds it to {@code}parentElement{@code}
056         * @param parentElement
057         * @param elementName
058         * @return the new child element
059         */
060        public static Element addChildElement(Element parentElement, String elementName) {
061                Element childElement = parentElement.getOwnerDocument().createElement(elementName);
062                parentElement.appendChild(childElement);
063                return childElement;
064        }
065
066        /**
067         * Create a new, empty {@code}org.w3c.dom.Document{@code}
068         * @return a new {@code}org.w3c.dom.Document{@code}
069         * @throws ParserConfigurationException
070         */
071        public static Document getNewDocument() throws ParserConfigurationException  {
072
073                //Create instance of DocumentBuilderFactory
074                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
075                //Get the DocumentBuilder
076                DocumentBuilder docBuilder = factory.newDocumentBuilder();
077                //Create blank DOM Document
078                Document doc = docBuilder.newDocument();
079                return doc;
080        }
081
082        /**
083         * Given a path to an XML file, parses into an {@code}org.w3c.dom.Document{@code} 
084         * @param fileName path to a readable XML file
085         * @return
086         * @throws SAXException
087         * @throws IOException
088         * @throws ParserConfigurationException
089         */
090        public static Document loadXML(String fileName) throws SAXException, IOException, ParserConfigurationException  {
091                InputStream is = openFile(new File(fileName));
092                Document doc = inputStreamToDocument(new BufferedInputStream(is));
093                close(is);
094                return doc;
095        }
096
097        /**
098         * Creates an {@code}org.w3c.dom.Document{@code} from the content of the {@code}inputStream{@code}
099         * @param inputStream
100         * @return a {@code}Document{@code}
101         * @throws SAXException
102         * @throws IOException
103         * @throws ParserConfigurationException
104         */
105        public static Document inputStreamToDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException  {
106                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
107
108                DocumentBuilder db = dbf.newDocumentBuilder();
109                dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
110
111                Document doc = db.parse(inputStream);
112                doc.getDocumentElement().normalize();
113
114                return doc;
115        }
116
117        /**
118         * Given an {@code}org.w3c.dom.Document{@code}, writes it to the given {@code}outputStream{@code}
119         * @param document
120         * @param outputStream
121         * @throws TransformerException
122         */
123        public static void outputToStream(Document document, OutputStream outputStream) throws TransformerException {
124                // Use a Transformer for output
125                TransformerFactory tFactory = TransformerFactory.newInstance();
126                Transformer transformer = tFactory.newTransformer();
127                //    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
128
129                DOMSource source = new DOMSource(document);
130                StreamResult result = new StreamResult(outputStream);
131                transformer.transform(source, result);
132        }
133
134        //static XPath xpath = XPathFactory.newInstance().newXPath();
135
136        /**
137         * Given an element, searches upwards through ancestor Elements till the first Element
138         * matching the requests {@code}parentName{@code} is found.
139         * @param element The starting element
140         * @param parentName The tag name of the requested Element.
141         * @return The found element, or {@code}null{@code} if no matching element is found,
142         */
143        public static Element selectParentElement(Element element, String parentName) {
144                
145            Node parentNode =  element.getParentNode();
146                if (parentNode == null) {
147                        return null;
148                }
149                // check that parent is actually an element, else return null
150                // this is to prevent ClassCastExceptions if element's parent is not an Element.
151                Element parentElement = null;
152                if (Node.ELEMENT_NODE == parentNode.getNodeType()){
153                        parentElement = (Element)parentNode;
154                } else {
155                        return null;
156                }
157                if (parentElement.getTagName().equals(parentName)) {
158                        return parentElement;
159                }
160                return selectParentElement(parentElement, parentName);
161        }
162
163        /**
164         * If {@code}xpathExpression{@code} is a plain string with no '/' characterr, this is 
165         * interpreted as a child element name to search for. 
166         * <b/>
167         * If {@code}xpathExpression{@code} is an XPath expression, this is evaluated and is assumed
168         * to identify a single element.
169         * @param element
170         * @param xpathExpression
171         * @return A single element or null if no match or the 1st match if matches more than 1
172         * @throws XPathExpressionException
173         */
174        public static Element selectSingleElement(Element element, String xpathExpression) throws XPathExpressionException {
175                if (element == null) {
176                        return null;
177                }
178                if (xpathExpression.indexOf("/") == -1) {
179                        NodeList nodeList = element.getChildNodes();
180                        for (int i = 0; i < nodeList.getLength(); i++) {
181                                Node node = nodeList.item(i);
182                                if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) {
183                                        return (Element) node;
184                                }
185                        }
186                        //  NodeList nodes = element.getElementsByTagName(xpathExpression);
187                        //  if (nodes.getLength() > 0) {
188                        //      return (Element) nodes.item(0);
189                        //  } else {
190                        return null;
191                        //  }
192                } else {
193                        XPath xpath = XPathFactory.newInstance().newXPath();
194                        Element node = (Element) xpath.evaluate(xpathExpression, element, XPathConstants.NODE);
195                        return node;
196                }
197        }
198
199        /**
200         * Gets a list of elements matching {@code}xpathExpression{@code}. If xpathExpression lacks
201         * a '/' character, only immediate children o {@code}element{@code} are searched over.
202         * <br/>
203         * If {@code}xpathExpression{@code} contains an '/' character, a full XPath search is made
204         * @param element
205         * @param xpathExpression
206         * @return A possibly empty but non-null {@code}ArrayList{@code}
207         * @throws XPathExpressionException
208         */
209        public static ArrayList<Element> selectElements(Element element, String xpathExpression) throws XPathExpressionException {
210                ArrayList<Element> resultVector = new ArrayList<Element>();
211                if (element == null) {
212                        return resultVector;
213                }
214                if (xpathExpression.indexOf("/") == -1) {
215                        NodeList nodeList = element.getChildNodes();
216                        for (int i = 0; i < nodeList.getLength(); i++) {
217                                Node node = nodeList.item(i);
218                                if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) {
219                                        resultVector.add((Element) node);
220                                }
221                        }
222                } else {
223                        XPath xpath = XPathFactory.newInstance().newXPath();
224                        NodeList nodes = (NodeList) xpath.evaluate(xpathExpression, element, XPathConstants.NODESET);
225
226
227                        for (int i = 0; i < nodes.getLength(); i++) {
228                                Node node = nodes.item(i);
229                                resultVector.add((Element) node);
230                        }
231                }
232                return resultVector;
233        }
234}