001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.xml; 022 023import java.io.IOException; 024import java.io.StringReader; 025import java.util.ArrayList; 026import java.util.List; 027 028import javax.vecmath.Matrix4d; 029import javax.xml.parsers.DocumentBuilder; 030import javax.xml.parsers.DocumentBuilderFactory; 031import javax.xml.parsers.ParserConfigurationException; 032 033import org.biojava.nbio.structure.StructureIdentifier; 034import org.biojava.nbio.structure.align.client.StructureName; 035import org.biojava.nbio.structure.align.multiple.Block; 036import org.biojava.nbio.structure.align.multiple.BlockImpl; 037import org.biojava.nbio.structure.align.multiple.BlockSet; 038import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 039import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 040import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 041import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; 042import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 043import org.biojava.nbio.structure.align.multiple.ScoresCache; 044import org.w3c.dom.Document; 045import org.w3c.dom.NamedNodeMap; 046import org.w3c.dom.Node; 047import org.w3c.dom.NodeList; 048import org.xml.sax.InputSource; 049import org.xml.sax.SAXException; 050 051/** 052 * Parse an XML file representing a {@link MultipleAlignmentEnsemble}, so 053 * that the original alignment can be recovered. 054 * <p> 055 * Atoms need to be downloaded, either manually or using the method 056 * getAtomArrays() in MultipleAlignmentEnsemble. 057 * 058 * @author Aleix Lafita 059 * @since 4.1.1 060 * 061 */ 062public class MultipleAlignmentXMLParser { 063 064 /** 065 * Creates a list of MultipleAlignment ensembles from an XML file. 066 * This recovers only the information that was previously stored. 067 * If the Atoms are needed, the method getAtomArrays() will automatically 068 * download the structures from the stored structure identifiers. 069 * 070 * @param xml String XML file containing any number of ensembles 071 * @return List of ensembles in the file 072 * @throws ParserConfigurationException 073 * @throws SAXException 074 * @throws IOException 075 */ 076 public static List<MultipleAlignmentEnsemble> parseXMLfile(String xml) 077 throws ParserConfigurationException, SAXException, IOException { 078 079 List<MultipleAlignmentEnsemble> ensembles = 080 new ArrayList<>(); 081 082 //Convert string to XML document 083 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 084 DocumentBuilder db = factory.newDocumentBuilder(); 085 InputSource inStream = new InputSource(); 086 inStream.setCharacterStream(new StringReader(xml)); 087 Document doc = db.parse(inStream); 088 doc.getDocumentElement().normalize(); 089 090 //In case there are more than one ensemble in the document (generalize) 091 NodeList listOfEnsembles = 092 doc.getElementsByTagName("MultipleAlignmentEnsemble"); 093 094 //Explore all the ensembles, if multiple ones 095 for (int e=0; e<listOfEnsembles.getLength(); e++) { 096 097 Node root = listOfEnsembles.item(e); 098 MultipleAlignmentEnsemble ensemble = parseEnsemble(root); 099 ensembles.add(ensemble); 100 } 101 return ensembles; 102 } 103 104 private static MultipleAlignmentEnsemble parseEnsemble(Node root){ 105 106 MultipleAlignmentEnsemble ensemble = 107 new MultipleAlignmentEnsembleImpl(); 108 109 parseHeader(root, ensemble); 110 111 NodeList children = root.getChildNodes(); 112 113 for (int i=0; i<children.getLength(); i++) { 114 115 Node child = children.item(i); 116 if ("MultipleAlignment".equals(child.getNodeName())){ 117 parseMultipleAlignment(child, ensemble); 118 } 119 else if ("Structures".equals(child.getNodeName())){ 120 parseStructures(child, ensemble); 121 } 122 else if ("ScoresCache".equals(child.getNodeName())){ 123 parseScoresCache(child, ensemble); 124 } 125 } 126 127 return ensemble; 128 } 129 130 private static MultipleAlignment parseMultipleAlignment(Node root, 131 MultipleAlignmentEnsemble ensemble) { 132 133 MultipleAlignment msa = new MultipleAlignmentImpl(ensemble); 134 NodeList children = root.getChildNodes(); 135 136 for (int i=0; i<children.getLength(); i++) { 137 138 Node child = children.item(i); 139 140 if ("BlockSet".equals(child.getNodeName())){ 141 parseBlockSet(child, msa); 142 } 143 else if ("ScoresCache".equals(child.getNodeName())){ 144 parseScoresCache(child, msa); 145 } 146 } 147 return msa; 148 } 149 150 private static BlockSet parseBlockSet(Node root, MultipleAlignment msa) { 151 152 BlockSet bs = new BlockSetImpl(msa); 153 List<Matrix4d> transforms = new ArrayList<Matrix4d>(); 154 NodeList children = root.getChildNodes(); 155 156 for (int i=0; i<children.getLength(); i++) { 157 158 Node child = children.item(i); 159 160 if ("Block".equals(child.getNodeName())){ 161 parseBlock(child, bs); 162 } 163 else if ("Matrix4d".equals(child.getNodeName())){ 164 Matrix4d t = parseMatrix4d(child); 165 transforms.add(t); 166 } 167 else if ("ScoresCache".equals(child.getNodeName())){ 168 parseScoresCache(child, bs); 169 } 170 } 171 //Because if it is 0 means that there were no transformations 172 if (transforms.size() != 0){ 173 bs.setTransformations(transforms); 174 } 175 return bs; 176 } 177 178 private static Block parseBlock(Node root, BlockSet blockSet) { 179 180 Block b = new BlockImpl(blockSet); 181 List<List<Integer>> alignRes = new ArrayList<>(); 182 b.setAlignRes(alignRes); 183 NodeList children = root.getChildNodes(); 184 185 for(int i=0; i<children.getLength(); i++) { 186 187 Node child = children.item(i); 188 if (child.getNodeName().contains("eqr")){ 189 190 NamedNodeMap atts = child.getAttributes(); 191 192 int str = 1; 193 Node node = atts.getNamedItem("str"+str); 194 195 while (node!=null){ 196 197 if (alignRes.size() < str) { 198 alignRes.add(new ArrayList<Integer>()); 199 } 200 201 String residue = node.getTextContent(); 202 if ("null".equals(residue)){ 203 alignRes.get(str-1).add(null); 204 } else { 205 alignRes.get(str-1).add(Integer.valueOf(residue)); 206 } 207 208 str++; 209 node = atts.getNamedItem("str"+str); 210 } 211 } 212 else if ("ScoresCache".equals(child.getNodeName())){ 213 parseScoresCache(child, b); 214 } 215 } 216 return b; 217 } 218 219 private static Matrix4d parseMatrix4d(Node node) { 220 221 Matrix4d m = new Matrix4d(); 222 NamedNodeMap atts = node.getAttributes(); 223 224 for (int x=0; x<4; x++){ 225 for (int y=0; y<4; y++){ 226 String key = "mat"+(x+1)+(y+1); 227 String value = atts.getNamedItem(key).getTextContent(); 228 m.setElement(x, y, Double.valueOf(value)); 229 } 230 } 231 return m; 232 } 233 234 private static void parseScoresCache(Node root, ScoresCache cache) { 235 236 NodeList children = root.getChildNodes(); 237 238 for (int i=0; i<children.getLength(); i++) { 239 240 Node child = children.item(i); 241 NamedNodeMap atts = child.getAttributes(); 242 if (atts != null) { 243 Node score = atts.getNamedItem("value"); 244 Double value = Double.valueOf(score.getTextContent()); 245 cache.putScore(child.getNodeName(), value); 246 } 247 } 248 } 249 250 private static void parseHeader(Node node, 251 MultipleAlignmentEnsemble ensemble) { 252 253 NamedNodeMap atts = node.getAttributes(); 254 255 String algo = atts.getNamedItem("Algorithm").getTextContent(); 256 if (!"null".equals(algo)){ 257 ensemble.setAlgorithmName(algo); 258 } 259 260 String version = atts.getNamedItem("Version").getTextContent(); 261 if (!"null".equals(version)){ 262 ensemble.setVersion(version); 263 } 264 265 String ioTime = atts.getNamedItem("IOTime").getTextContent(); 266 if (!"null".equals(ioTime)){ 267 ensemble.setIoTime(Long.valueOf(ioTime)); 268 } 269 270 String time = atts.getNamedItem("CalculationTime").getTextContent(); 271 if (!"null".equals(time)){ 272 ensemble.setCalculationTime(Long.valueOf(time)); 273 } 274 } 275 276 private static void parseStructures(Node root, 277 MultipleAlignmentEnsemble ensemble) { 278 279 List<StructureIdentifier> names = new ArrayList<>(); 280 ensemble.setStructureIdentifiers(names); 281 282 NamedNodeMap atts = root.getAttributes(); 283 284 int str = 1; 285 Node node = atts.getNamedItem("name"+str); 286 287 while (node!=null){ 288 289 String name = node.getTextContent(); 290 names.add(new StructureName(name)); 291 292 str++; 293 node = atts.getNamedItem("name"+str); 294 } 295 } 296 297}