001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.util; 022 023import org.w3c.dom.Document; 024import org.w3c.dom.Element; 025import org.w3c.dom.Node; 026import org.w3c.dom.NodeList; 027import org.xml.sax.SAXException; 028 029import javax.xml.parsers.DocumentBuilder; 030import javax.xml.parsers.DocumentBuilderFactory; 031import javax.xml.parsers.ParserConfigurationException; 032import javax.xml.transform.Transformer; 033import javax.xml.transform.TransformerException; 034import javax.xml.transform.TransformerFactory; 035import javax.xml.transform.dom.DOMSource; 036import javax.xml.transform.stream.StreamResult; 037import javax.xml.XMLConstants; 038import javax.xml.xpath.XPath; 039import javax.xml.xpath.XPathConstants; 040import javax.xml.xpath.XPathExpressionException; 041import javax.xml.xpath.XPathFactory; 042import java.io.*; 043import java.util.ArrayList; 044 045import static org.biojava.nbio.core.sequence.io.util.IOUtils.close; 046import static org.biojava.nbio.core.sequence.io.util.IOUtils.openFile; 047import java.util.List; 048 049/** 050 * Helper methods to simplify boilerplate XML parsing code for {@code}org.w3c.dom{@code} XML objects 051 * @author Scooter 052 */ 053public class XMLHelper { 054 055 /** 056 * Creates a new element called {@code}elementName{@code} and adds it to {@code}parentElement{@code} 057 * @param parentElement 058 * @param elementName 059 * @return the new child element 060 */ 061 public static Element addChildElement(Element parentElement, String elementName) { 062 Element childElement = parentElement.getOwnerDocument().createElement(elementName); 063 parentElement.appendChild(childElement); 064 return childElement; 065 } 066 067 /** 068 * Create a new, empty {@code}org.w3c.dom.Document{@code} 069 * @return a new {@code}org.w3c.dom.Document{@code} 070 * @throws ParserConfigurationException 071 */ 072 public static Document getNewDocument() throws ParserConfigurationException { 073 074 //Create instance of DocumentBuilderFactory 075 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 076 //Get the DocumentBuilder 077 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 078 //Create blank DOM Document 079 Document doc = docBuilder.newDocument(); 080 return doc; 081 } 082 083 /** 084 * Given a path to an XML file, parses into an {@code}org.w3c.dom.Document{@code} 085 * @param fileName path to a readable XML file 086 * @return 087 * @throws SAXException 088 * @throws IOException 089 * @throws ParserConfigurationException 090 */ 091 public static Document loadXML(String fileName) throws SAXException, IOException, ParserConfigurationException { 092 InputStream is = openFile(new File(fileName)); 093 Document doc = inputStreamToDocument(new BufferedInputStream(is)); 094 close(is); 095 return doc; 096 } 097 098 /** 099 * Creates an {@code}org.w3c.dom.Document{@code} from the content of the {@code}inputStream{@code} 100 * @param inputStream 101 * @return a {@code}Document{@code} 102 * @throws SAXException 103 * @throws IOException 104 * @throws ParserConfigurationException 105 */ 106 public static Document inputStreamToDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException { 107 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 108 109 DocumentBuilder db = dbf.newDocumentBuilder(); 110 dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 111 112 Document doc = db.parse(inputStream); 113 doc.getDocumentElement().normalize(); 114 115 return doc; 116 } 117 118 /** 119 * Given an {@code}org.w3c.dom.Document{@code}, writes it to the given {@code}outputStream{@code} 120 * @param document 121 * @param outputStream 122 * @throws TransformerException 123 */ 124 public static void outputToStream(Document document, OutputStream outputStream) throws TransformerException { 125 // Use a Transformer for output 126 TransformerFactory tFactory = TransformerFactory.newInstance(); 127 Transformer transformer = tFactory.newTransformer(); 128 // transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 129 130 DOMSource source = new DOMSource(document); 131 StreamResult result = new StreamResult(outputStream); 132 transformer.transform(source, result); 133 } 134 135 //static XPath xpath = XPathFactory.newInstance().newXPath(); 136 137 /** 138 * Given an element, searches upwards through ancestor Elements till the first Element 139 * matching the requests {@code}parentName{@code} is found. 140 * @param element The starting element 141 * @param parentName The tag name of the requested Element. 142 * @return The found element, or {@code}null{@code} if no matching element is found, 143 */ 144 public static Element selectParentElement(Element element, String parentName) { 145 146 Node parentNode = element.getParentNode(); 147 if (parentNode == null) { 148 return null; 149 } 150 // check that parent is actually an element, else return null 151 // this is to prevent ClassCastExceptions if element's parent is not an Element. 152 Element parentElement = null; 153 if (Node.ELEMENT_NODE == parentNode.getNodeType()){ 154 parentElement = (Element)parentNode; 155 } else { 156 return null; 157 } 158 if (parentElement.getTagName().equals(parentName)) { 159 return parentElement; 160 } 161 return selectParentElement(parentElement, parentName); 162 } 163 164 /** 165 * If {@code}xpathExpression{@code} is a plain string with no '/' characterr, this is 166 * interpreted as a child element name to search for. 167 * <p> 168 * If {@code}xpathExpression{@code} is an XPath expression, this is evaluated and is assumed 169 * to identify a single element. 170 * @param element 171 * @param xpathExpression 172 * @return A single element or null if no match or the 1st match if matches more than 1 173 * @throws XPathExpressionException 174 */ 175 public static Element selectSingleElement(Element element, String xpathExpression) throws XPathExpressionException { 176 if (element == null) { 177 return null; 178 } 179 if (xpathExpression.indexOf("/") == -1) { 180 NodeList nodeList = element.getChildNodes(); 181 for (int i = 0; i < nodeList.getLength(); i++) { 182 Node node = nodeList.item(i); 183 if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) { 184 return (Element) node; 185 } 186 } 187 // NodeList nodes = element.getElementsByTagName(xpathExpression); 188 // if (nodes.getLength() > 0) { 189 // return (Element) nodes.item(0); 190 // } else { 191 return null; 192 // } 193 } else { 194 XPath xpath = XPathFactory.newInstance().newXPath(); 195 Element node = (Element) xpath.evaluate(xpathExpression, element, XPathConstants.NODE); 196 return node; 197 } 198 } 199 200 /** 201 * Gets a list of elements matching {@code}xpathExpression{@code}. If xpathExpression lacks 202 * a '/' character, only immediate children o {@code}element{@code} are searched over. 203 * <br/> 204 * If {@code}xpathExpression{@code} contains an '/' character, a full XPath search is made 205 * @param element 206 * @param xpathExpression 207 * @return A possibly empty but non-null {@code}ArrayList{@code} 208 * @throws XPathExpressionException 209 */ 210 public static List<Element> selectElements(Element element, String xpathExpression) throws XPathExpressionException { 211 List<Element> resultVector = new ArrayList<>(); 212 if (element == null) { 213 return resultVector; 214 } 215 if (xpathExpression.indexOf("/") == -1) { 216 NodeList nodeList = element.getChildNodes(); 217 for (int i = 0; i < nodeList.getLength(); i++) { 218 Node node = nodeList.item(i); 219 if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) { 220 resultVector.add((Element) node); 221 } 222 } 223 } else { 224 XPath xpath = XPathFactory.newInstance().newXPath(); 225 NodeList nodes = (NodeList) xpath.evaluate(xpathExpression, element, XPathConstants.NODESET); 226 227 228 for (int i = 0; i < nodes.getLength(); i++) { 229 Node node = nodes.item(i); 230 resultVector.add((Element) node); 231 } 232 } 233 return resultVector; 234 } 235}