001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.util;
022
023import org.w3c.dom.Document;
024import org.w3c.dom.Element;
025import org.w3c.dom.Node;
026import org.w3c.dom.NodeList;
027import org.xml.sax.SAXException;
028
029import javax.xml.parsers.DocumentBuilder;
030import javax.xml.parsers.DocumentBuilderFactory;
031import javax.xml.parsers.ParserConfigurationException;
032import javax.xml.transform.Transformer;
033import javax.xml.transform.TransformerException;
034import javax.xml.transform.TransformerFactory;
035import javax.xml.transform.dom.DOMSource;
036import javax.xml.transform.stream.StreamResult;
037import javax.xml.XMLConstants;
038import javax.xml.xpath.XPath;
039import javax.xml.xpath.XPathConstants;
040import javax.xml.xpath.XPathExpressionException;
041import javax.xml.xpath.XPathFactory;
042import java.io.*;
043import java.util.ArrayList;
044
045import static org.biojava.nbio.core.sequence.io.util.IOUtils.close;
046import static org.biojava.nbio.core.sequence.io.util.IOUtils.openFile;
047import java.util.List;
048
049/**
050 * Helper methods to simplify boilerplate XML parsing code for  {@code}org.w3c.dom{@code} XML objects
051 * @author Scooter
052 */
053public class XMLHelper {
054
055        /**
056         * Creates a new element called {@code}elementName{@code} and adds it to {@code}parentElement{@code}
057         * @param parentElement
058         * @param elementName
059         * @return the new child element
060         */
061        public static Element addChildElement(Element parentElement, String elementName) {
062                Element childElement = parentElement.getOwnerDocument().createElement(elementName);
063                parentElement.appendChild(childElement);
064                return childElement;
065        }
066
067        /**
068         * Create a new, empty {@code}org.w3c.dom.Document{@code}
069         * @return a new {@code}org.w3c.dom.Document{@code}
070         * @throws ParserConfigurationException
071         */
072        public static Document getNewDocument() throws ParserConfigurationException  {
073
074                //Create instance of DocumentBuilderFactory
075                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
076                //Get the DocumentBuilder
077                DocumentBuilder docBuilder = factory.newDocumentBuilder();
078                //Create blank DOM Document
079                Document doc = docBuilder.newDocument();
080                return doc;
081        }
082
083        /**
084         * Given a path to an XML file, parses into an {@code}org.w3c.dom.Document{@code} 
085         * @param fileName path to a readable XML file
086         * @return
087         * @throws SAXException
088         * @throws IOException
089         * @throws ParserConfigurationException
090         */
091        public static Document loadXML(String fileName) throws SAXException, IOException, ParserConfigurationException  {
092                InputStream is = openFile(new File(fileName));
093                Document doc = inputStreamToDocument(new BufferedInputStream(is));
094                close(is);
095                return doc;
096        }
097
098        /**
099         * Creates an {@code}org.w3c.dom.Document{@code} from the content of the {@code}inputStream{@code}
100         * @param inputStream
101         * @return a {@code}Document{@code}
102         * @throws SAXException
103         * @throws IOException
104         * @throws ParserConfigurationException
105         */
106        public static Document inputStreamToDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException  {
107                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
108
109                DocumentBuilder db = dbf.newDocumentBuilder();
110                dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
111
112                Document doc = db.parse(inputStream);
113                doc.getDocumentElement().normalize();
114
115                return doc;
116        }
117
118        /**
119         * Given an {@code}org.w3c.dom.Document{@code}, writes it to the given {@code}outputStream{@code}
120         * @param document
121         * @param outputStream
122         * @throws TransformerException
123         */
124        public static void outputToStream(Document document, OutputStream outputStream) throws TransformerException {
125                // Use a Transformer for output
126                TransformerFactory tFactory = TransformerFactory.newInstance();
127                Transformer transformer = tFactory.newTransformer();
128                //    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
129
130                DOMSource source = new DOMSource(document);
131                StreamResult result = new StreamResult(outputStream);
132                transformer.transform(source, result);
133        }
134
135        //static XPath xpath = XPathFactory.newInstance().newXPath();
136
137        /**
138         * Given an element, searches upwards through ancestor Elements till the first Element
139         * matching the requests {@code}parentName{@code} is found.
140         * @param element The starting element
141         * @param parentName The tag name of the requested Element.
142         * @return The found element, or {@code}null{@code} if no matching element is found,
143         */
144        public static Element selectParentElement(Element element, String parentName) {
145                
146            Node parentNode =  element.getParentNode();
147                if (parentNode == null) {
148                        return null;
149                }
150                // check that parent is actually an element, else return null
151                // this is to prevent ClassCastExceptions if element's parent is not an Element.
152                Element parentElement = null;
153                if (Node.ELEMENT_NODE == parentNode.getNodeType()){
154                        parentElement = (Element)parentNode;
155                } else {
156                        return null;
157                }
158                if (parentElement.getTagName().equals(parentName)) {
159                        return parentElement;
160                }
161                return selectParentElement(parentElement, parentName);
162        }
163
164        /**
165         * If {@code}xpathExpression{@code} is a plain string with no '/' characterr, this is 
166         * interpreted as a child element name to search for. 
167         * <p>
168         * If {@code}xpathExpression{@code} is an XPath expression, this is evaluated and is assumed
169         * to identify a single element.
170         * @param element
171         * @param xpathExpression
172         * @return A single element or null if no match or the 1st match if matches more than 1
173         * @throws XPathExpressionException
174         */
175        public static Element selectSingleElement(Element element, String xpathExpression) throws XPathExpressionException {
176                if (element == null) {
177                        return null;
178                }
179                if (xpathExpression.indexOf("/") == -1) {
180                        NodeList nodeList = element.getChildNodes();
181                        for (int i = 0; i < nodeList.getLength(); i++) {
182                                Node node = nodeList.item(i);
183                                if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) {
184                                        return (Element) node;
185                                }
186                        }
187                        //  NodeList nodes = element.getElementsByTagName(xpathExpression);
188                        //  if (nodes.getLength() > 0) {
189                        //      return (Element) nodes.item(0);
190                        //  } else {
191                        return null;
192                        //  }
193                } else {
194                        XPath xpath = XPathFactory.newInstance().newXPath();
195                        Element node = (Element) xpath.evaluate(xpathExpression, element, XPathConstants.NODE);
196                        return node;
197                }
198        }
199
200        /**
201         * Gets a list of elements matching {@code}xpathExpression{@code}. If xpathExpression lacks
202         * a '/' character, only immediate children o {@code}element{@code} are searched over.
203         * <br/>
204         * If {@code}xpathExpression{@code} contains an '/' character, a full XPath search is made
205         * @param element
206         * @param xpathExpression
207         * @return A possibly empty but non-null {@code}ArrayList{@code}
208         * @throws XPathExpressionException
209         */
210        public static List<Element> selectElements(Element element, String xpathExpression) throws XPathExpressionException {
211                List<Element> resultVector = new ArrayList<>();
212                if (element == null) {
213                        return resultVector;
214                }
215                if (xpathExpression.indexOf("/") == -1) {
216                        NodeList nodeList = element.getChildNodes();
217                        for (int i = 0; i < nodeList.getLength(); i++) {
218                                Node node = nodeList.item(i);
219                                if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) {
220                                        resultVector.add((Element) node);
221                                }
222                        }
223                } else {
224                        XPath xpath = XPathFactory.newInstance().newXPath();
225                        NodeList nodes = (NodeList) xpath.evaluate(xpathExpression, element, XPathConstants.NODESET);
226
227
228                        for (int i = 0; i < nodes.getLength(); i++) {
229                                Node node = nodes.item(i);
230                                resultVector.add((Element) node);
231                        }
232                }
233                return resultVector;
234        }
235}