001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.xml;
022
023import java.io.IOException;
024import java.io.StringReader;
025import java.util.ArrayList;
026import java.util.List;
027
028import javax.vecmath.Matrix4d;
029import javax.xml.parsers.DocumentBuilder;
030import javax.xml.parsers.DocumentBuilderFactory;
031import javax.xml.parsers.ParserConfigurationException;
032
033import org.biojava.nbio.structure.StructureIdentifier;
034import org.biojava.nbio.structure.align.client.StructureName;
035import org.biojava.nbio.structure.align.multiple.Block;
036import org.biojava.nbio.structure.align.multiple.BlockImpl;
037import org.biojava.nbio.structure.align.multiple.BlockSet;
038import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
039import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
040import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
041import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
042import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
043import org.biojava.nbio.structure.align.multiple.ScoresCache;
044import org.w3c.dom.Document;
045import org.w3c.dom.NamedNodeMap;
046import org.w3c.dom.Node;
047import org.w3c.dom.NodeList;
048import org.xml.sax.InputSource;
049import org.xml.sax.SAXException;
050
051/**
052 * Parse an XML file representing a {@link MultipleAlignmentEnsemble}, so
053 * that the original alignment can be recovered.
054 * <p>
055 * Atoms need to be downloaded, either manually or using the method
056 * getAtomArrays() in MultipleAlignmentEnsemble.
057 *
058 * @author Aleix Lafita
059 * @since 4.1.1
060 *
061 */
062public class MultipleAlignmentXMLParser {
063
064        /**
065         * Creates a list of MultipleAlignment ensembles from an XML file.
066         * This recovers only the information that was previously stored.
067         * If the Atoms are needed, the method getAtomArrays() will automatically
068         * download the structures from the stored structure identifiers.
069         *
070         * @param xml String XML file containing any number of ensembles
071         * @return List of ensembles in the file
072         * @throws ParserConfigurationException
073         * @throws SAXException
074         * @throws IOException
075         */
076        public static List<MultipleAlignmentEnsemble> parseXMLfile(String xml)
077                        throws ParserConfigurationException, SAXException, IOException {
078
079                List<MultipleAlignmentEnsemble> ensembles =
080                                new ArrayList<>();
081
082                //Convert string to XML document
083                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
084                DocumentBuilder db = factory.newDocumentBuilder();
085                InputSource inStream = new InputSource();
086                inStream.setCharacterStream(new StringReader(xml));
087                Document doc = db.parse(inStream);
088                doc.getDocumentElement().normalize();
089
090                //In case there are more than one ensemble in the document (generalize)
091                NodeList listOfEnsembles =
092                                doc.getElementsByTagName("MultipleAlignmentEnsemble");
093
094                //Explore all the ensembles, if multiple ones
095                for (int e=0; e<listOfEnsembles.getLength(); e++) {
096
097                        Node root = listOfEnsembles.item(e);
098                        MultipleAlignmentEnsemble ensemble = parseEnsemble(root);
099                        ensembles.add(ensemble);
100                }
101                return ensembles;
102        }
103
104        private static MultipleAlignmentEnsemble parseEnsemble(Node root){
105
106                MultipleAlignmentEnsemble ensemble =
107                                new MultipleAlignmentEnsembleImpl();
108
109                parseHeader(root, ensemble);
110
111                NodeList children = root.getChildNodes();
112
113                for (int i=0; i<children.getLength(); i++) {
114
115                        Node child = children.item(i);
116                        if ("MultipleAlignment".equals(child.getNodeName())){
117                                parseMultipleAlignment(child, ensemble);
118                        }
119                        else if ("Structures".equals(child.getNodeName())){
120                                parseStructures(child, ensemble);
121                        }
122                        else if ("ScoresCache".equals(child.getNodeName())){
123                                parseScoresCache(child, ensemble);
124                        }
125                }
126
127                return ensemble;
128        }
129
130        private static MultipleAlignment parseMultipleAlignment(Node root,
131                        MultipleAlignmentEnsemble ensemble) {
132
133                MultipleAlignment msa = new MultipleAlignmentImpl(ensemble);
134                NodeList children = root.getChildNodes();
135
136                for (int i=0; i<children.getLength(); i++) {
137
138                        Node child = children.item(i);
139
140                        if ("BlockSet".equals(child.getNodeName())){
141                                parseBlockSet(child, msa);
142                        }
143                        else if ("ScoresCache".equals(child.getNodeName())){
144                                parseScoresCache(child, msa);
145                        }
146                }
147                return msa;
148        }
149
150        private static BlockSet parseBlockSet(Node root, MultipleAlignment msa) {
151
152                BlockSet bs = new BlockSetImpl(msa);
153                List<Matrix4d> transforms = new ArrayList<Matrix4d>();
154                NodeList children = root.getChildNodes();
155
156                for (int i=0; i<children.getLength(); i++) {
157
158                        Node child = children.item(i);
159
160                        if ("Block".equals(child.getNodeName())){
161                                parseBlock(child, bs);
162                        }
163                        else if ("Matrix4d".equals(child.getNodeName())){
164                                Matrix4d t = parseMatrix4d(child);
165                                transforms.add(t);
166                        }
167                        else if ("ScoresCache".equals(child.getNodeName())){
168                                parseScoresCache(child, bs);
169                        }
170                }
171                //Because if it is 0 means that there were no transformations
172                if (transforms.size() != 0){
173                        bs.setTransformations(transforms);
174                }
175                return bs;
176        }
177
178        private static Block parseBlock(Node root, BlockSet blockSet) {
179
180                Block b = new BlockImpl(blockSet);
181                List<List<Integer>> alignRes = new ArrayList<>();
182                b.setAlignRes(alignRes);
183                NodeList children = root.getChildNodes();
184
185                for(int i=0; i<children.getLength(); i++) {
186
187                        Node child = children.item(i);
188                        if (child.getNodeName().contains("eqr")){
189
190                                NamedNodeMap atts = child.getAttributes();
191
192                                int str = 1;
193                                Node node = atts.getNamedItem("str"+str);
194
195                                while (node!=null){
196
197                                        if (alignRes.size() < str) {
198                                                alignRes.add(new ArrayList<Integer>());
199                                        }
200
201                                        String residue = node.getTextContent();
202                                        if ("null".equals(residue)){
203                                                alignRes.get(str-1).add(null);
204                                        } else {
205                                                alignRes.get(str-1).add(Integer.valueOf(residue));
206                                        }
207
208                                        str++;
209                                        node = atts.getNamedItem("str"+str);
210                                }
211                        }
212                        else if ("ScoresCache".equals(child.getNodeName())){
213                                parseScoresCache(child, b);
214                        }
215                }
216                return b;
217        }
218
219        private static Matrix4d parseMatrix4d(Node node) {
220
221                Matrix4d m = new Matrix4d();
222                NamedNodeMap atts = node.getAttributes();
223
224                for (int x=0; x<4; x++){
225                        for (int y=0; y<4; y++){
226                                String key = "mat"+(x+1)+(y+1);
227                                String value = atts.getNamedItem(key).getTextContent();
228                                m.setElement(x, y, Double.valueOf(value));
229                        }
230                }
231                return m;
232        }
233
234        private static void parseScoresCache(Node root, ScoresCache cache) {
235
236                NodeList children = root.getChildNodes();
237
238                for (int i=0; i<children.getLength(); i++) {
239
240                        Node child = children.item(i);
241                        NamedNodeMap atts = child.getAttributes();
242                        if (atts != null) {
243                                Node score = atts.getNamedItem("value");
244                                Double value = Double.valueOf(score.getTextContent());
245                                cache.putScore(child.getNodeName(), value);
246                        }
247                }
248        }
249
250        private static void parseHeader(Node node,
251                        MultipleAlignmentEnsemble ensemble) {
252
253                NamedNodeMap atts = node.getAttributes();
254
255                String algo = atts.getNamedItem("Algorithm").getTextContent();
256                if (!"null".equals(algo)){
257                        ensemble.setAlgorithmName(algo);
258                }
259
260                String version = atts.getNamedItem("Version").getTextContent();
261                if (!"null".equals(version)){
262                        ensemble.setVersion(version);
263                }
264
265                String ioTime = atts.getNamedItem("IOTime").getTextContent();
266                if (!"null".equals(ioTime)){
267                        ensemble.setIoTime(Long.valueOf(ioTime));
268                }
269
270                String time = atts.getNamedItem("CalculationTime").getTextContent();
271                if (!"null".equals(time)){
272                        ensemble.setCalculationTime(Long.valueOf(time));
273                }
274        }
275
276        private static void parseStructures(Node root,
277                        MultipleAlignmentEnsemble ensemble) {
278
279                List<StructureIdentifier> names = new ArrayList<>();
280                ensemble.setStructureIdentifiers(names);
281
282                NamedNodeMap atts = root.getAttributes();
283
284                int str = 1;
285                Node node = atts.getNamedItem("name"+str);
286
287                while (node!=null){
288
289                        String name = node.getTextContent();
290                        names.add(new StructureName(name));
291
292                        str++;
293                        node = atts.getNamedItem("name"+str);
294                }
295        }
296
297}