001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.util; 022 023import org.w3c.dom.Document; 024import org.w3c.dom.Element; 025import org.w3c.dom.Node; 026import org.w3c.dom.NodeList; 027import org.xml.sax.SAXException; 028 029import javax.xml.parsers.DocumentBuilder; 030import javax.xml.parsers.DocumentBuilderFactory; 031import javax.xml.parsers.ParserConfigurationException; 032import javax.xml.transform.Transformer; 033import javax.xml.transform.TransformerException; 034import javax.xml.transform.TransformerFactory; 035import javax.xml.transform.dom.DOMSource; 036import javax.xml.transform.stream.StreamResult; 037import javax.xml.XMLConstants; 038import javax.xml.xpath.XPath; 039import javax.xml.xpath.XPathConstants; 040import javax.xml.xpath.XPathExpressionException; 041import javax.xml.xpath.XPathFactory; 042import java.io.*; 043import java.util.ArrayList; 044 045import static org.biojava.nbio.core.sequence.io.util.IOUtils.close; 046import static org.biojava.nbio.core.sequence.io.util.IOUtils.openFile; 047 048/** 049 * Helper methods to simplify boilerplate XML parsing code for {@code}org.w3c.dom{@code} XML objects 050 * @author Scooter 051 */ 052public class XMLHelper { 053 054 /** 055 * Creates a new element called {@code}elementName{@code} and adds it to {@code}parentElement{@code} 056 * @param parentElement 057 * @param elementName 058 * @return the new child element 059 */ 060 public static Element addChildElement(Element parentElement, String elementName) { 061 Element childElement = parentElement.getOwnerDocument().createElement(elementName); 062 parentElement.appendChild(childElement); 063 return childElement; 064 } 065 066 /** 067 * Create a new, empty {@code}org.w3c.dom.Document{@code} 068 * @return a new {@code}org.w3c.dom.Document{@code} 069 * @throws ParserConfigurationException 070 */ 071 public static Document getNewDocument() throws ParserConfigurationException { 072 073 //Create instance of DocumentBuilderFactory 074 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 075 //Get the DocumentBuilder 076 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 077 //Create blank DOM Document 078 Document doc = docBuilder.newDocument(); 079 return doc; 080 } 081 082 /** 083 * Given a path to an XML file, parses into an {@code}org.w3c.dom.Document{@code} 084 * @param fileName path to a readable XML file 085 * @return 086 * @throws SAXException 087 * @throws IOException 088 * @throws ParserConfigurationException 089 */ 090 public static Document loadXML(String fileName) throws SAXException, IOException, ParserConfigurationException { 091 InputStream is = openFile(new File(fileName)); 092 Document doc = inputStreamToDocument(new BufferedInputStream(is)); 093 close(is); 094 return doc; 095 } 096 097 /** 098 * Creates an {@code}org.w3c.dom.Document{@code} from the content of the {@code}inputStream{@code} 099 * @param inputStream 100 * @return a {@code}Document{@code} 101 * @throws SAXException 102 * @throws IOException 103 * @throws ParserConfigurationException 104 */ 105 public static Document inputStreamToDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException { 106 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 107 108 DocumentBuilder db = dbf.newDocumentBuilder(); 109 dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 110 111 Document doc = db.parse(inputStream); 112 doc.getDocumentElement().normalize(); 113 114 return doc; 115 } 116 117 /** 118 * Given an {@code}org.w3c.dom.Document{@code}, writes it to the given {@code}outputStream{@code} 119 * @param document 120 * @param outputStream 121 * @throws TransformerException 122 */ 123 public static void outputToStream(Document document, OutputStream outputStream) throws TransformerException { 124 // Use a Transformer for output 125 TransformerFactory tFactory = TransformerFactory.newInstance(); 126 Transformer transformer = tFactory.newTransformer(); 127 // transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 128 129 DOMSource source = new DOMSource(document); 130 StreamResult result = new StreamResult(outputStream); 131 transformer.transform(source, result); 132 } 133 134 //static XPath xpath = XPathFactory.newInstance().newXPath(); 135 136 /** 137 * Given an element, searches upwards through ancestor Elements till the first Element 138 * matching the requests {@code}parentName{@code} is found. 139 * @param element The starting element 140 * @param parentName The tag name of the requested Element. 141 * @return The found element, or {@code}null{@code} if no matching element is found, 142 */ 143 public static Element selectParentElement(Element element, String parentName) { 144 145 Node parentNode = element.getParentNode(); 146 if (parentNode == null) { 147 return null; 148 } 149 // check that parent is actually an element, else return null 150 // this is to prevent ClassCastExceptions if element's parent is not an Element. 151 Element parentElement = null; 152 if (Node.ELEMENT_NODE == parentNode.getNodeType()){ 153 parentElement = (Element)parentNode; 154 } else { 155 return null; 156 } 157 if (parentElement.getTagName().equals(parentName)) { 158 return parentElement; 159 } 160 return selectParentElement(parentElement, parentName); 161 } 162 163 /** 164 * If {@code}xpathExpression{@code} is a plain string with no '/' characterr, this is 165 * interpreted as a child element name to search for. 166 * <b/> 167 * If {@code}xpathExpression{@code} is an XPath expression, this is evaluated and is assumed 168 * to identify a single element. 169 * @param element 170 * @param xpathExpression 171 * @return A single element or null if no match or the 1st match if matches more than 1 172 * @throws XPathExpressionException 173 */ 174 public static Element selectSingleElement(Element element, String xpathExpression) throws XPathExpressionException { 175 if (element == null) { 176 return null; 177 } 178 if (xpathExpression.indexOf("/") == -1) { 179 NodeList nodeList = element.getChildNodes(); 180 for (int i = 0; i < nodeList.getLength(); i++) { 181 Node node = nodeList.item(i); 182 if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) { 183 return (Element) node; 184 } 185 } 186 // NodeList nodes = element.getElementsByTagName(xpathExpression); 187 // if (nodes.getLength() > 0) { 188 // return (Element) nodes.item(0); 189 // } else { 190 return null; 191 // } 192 } else { 193 XPath xpath = XPathFactory.newInstance().newXPath(); 194 Element node = (Element) xpath.evaluate(xpathExpression, element, XPathConstants.NODE); 195 return node; 196 } 197 } 198 199 /** 200 * Gets a list of elements matching {@code}xpathExpression{@code}. If xpathExpression lacks 201 * a '/' character, only immediate children o {@code}element{@code} are searched over. 202 * <br/> 203 * If {@code}xpathExpression{@code} contains an '/' character, a full XPath search is made 204 * @param element 205 * @param xpathExpression 206 * @return A possibly empty but non-null {@code}ArrayList{@code} 207 * @throws XPathExpressionException 208 */ 209 public static ArrayList<Element> selectElements(Element element, String xpathExpression) throws XPathExpressionException { 210 ArrayList<Element> resultVector = new ArrayList<Element>(); 211 if (element == null) { 212 return resultVector; 213 } 214 if (xpathExpression.indexOf("/") == -1) { 215 NodeList nodeList = element.getChildNodes(); 216 for (int i = 0; i < nodeList.getLength(); i++) { 217 Node node = nodeList.item(i); 218 if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals(xpathExpression)) { 219 resultVector.add((Element) node); 220 } 221 } 222 } else { 223 XPath xpath = XPathFactory.newInstance().newXPath(); 224 NodeList nodes = (NodeList) xpath.evaluate(xpathExpression, element, XPathConstants.NODESET); 225 226 227 for (int i = 0; i < nodes.getLength(); i++) { 228 Node node = nodes.item(i); 229 resultVector.add((Element) node); 230 } 231 } 232 return resultVector; 233 } 234}