001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 1, 2010
021 * Author: Jianjiong Gao
022 *
023 */
024
025package org.biojava.nbio.protmod.io;
026
027import org.biojava.nbio.protmod.*;
028import org.w3c.dom.Document;
029import org.w3c.dom.NamedNodeMap;
030import org.w3c.dom.Node;
031import org.w3c.dom.NodeList;
032import org.xml.sax.SAXException;
033
034import javax.xml.parsers.DocumentBuilder;
035import javax.xml.parsers.DocumentBuilderFactory;
036import javax.xml.parsers.ParserConfigurationException;
037import java.io.IOException;
038import java.io.InputStream;
039import java.util.*;
040
041/**
042 *
043 * @author Jianjiong Gao
044 * @since 3.0
045 */
046public final class ProteinModificationXmlReader {
047        /**
048         * This is a utility class and thus cannot be instantialized.
049         */
050        private ProteinModificationXmlReader() {}
051
052        /**
053         * Read protein modifications from XML file and register them.
054         * @param isXml {@link InputStream} of the XML file.
055         * @throws IOException if failed to read the XML file.
056         * @throws ParserConfigurationException if parse errors occur.
057         * @throws SAXException the {@link DocumentBuilder} cannot be created.
058         */
059        public static void registerProteinModificationFromXml(InputStream isXml)
060                        throws IOException, ParserConfigurationException, SAXException {
061                if (isXml==null) {
062                        throw new IllegalArgumentException("Null argument.");
063                }
064
065                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
066                DocumentBuilder builder = factory.newDocumentBuilder();
067                Document doc = builder.parse(isXml);
068
069                NodeList modNodes = doc.getElementsByTagName("Entry");
070                int modSize = modNodes.getLength();
071                List<Node> nodes;
072                for (int iMod=0; iMod<modSize; iMod++) {
073                        Node modNode = modNodes.item(iMod);
074                        Map<String,List<Node>> infoNodes = getChildNodes(modNode);
075
076                        // ID
077                        nodes = infoNodes.get("Id");
078                        if (nodes==null || nodes.size()!=1) {
079                                throw new RuntimeException("Each modification must have exact " +
080                                                "one <Id> field.");
081                        }
082                        String id = nodes.get(0).getTextContent();
083
084                        // modification category
085                        nodes = infoNodes.get("Category");
086                        if (nodes==null || nodes.size()!=1) {
087                                throw new RuntimeException("Each modification must have exact " +
088                                                "one <Category> field. See Modification "+id+".");
089                        }
090                        ModificationCategory cat = ModificationCategory.getByLabel(
091                                        nodes.get(0).getTextContent());
092                        if (cat==null) {
093                                throw new RuntimeException(nodes.get(0).getTextContent()+
094                                        " is not defined as an modification category." +
095                                        " See Modification "+id+".");
096                        }
097
098                        // occurrence type
099                        nodes = infoNodes.get("Occurrence");
100                        if (nodes==null || nodes.size()!=1) {
101                                throw new RuntimeException("Each modification must have exact " +
102                                                "one <Occurrence> field. See Modification "+id+".");
103                        }
104                        ModificationOccurrenceType occType = ModificationOccurrenceType
105                                .getByLabel(nodes.get(0).getTextContent());
106                        if (occType==null) {
107                                throw new RuntimeException(nodes.get(0).getTextContent()+
108                                        " is not defined as an modification occurence type." +
109                                        " See Modification "+id+".");
110                        }
111
112                        // condition
113                        ModificationCondition condition = null;
114                        {
115                                nodes = infoNodes.get("Condition");
116                                if (nodes==null || nodes.size()!=1) {
117                                        throw new RuntimeException("Each modification must have exact " +
118                                                        "one <Condition> field. See Modification "+id+".");
119                                }
120
121                                Node compsNode = nodes.get(0);
122
123                                // keep track of the labels of component indices
124                                Map<String,Integer> mapLabelComp = new HashMap<>();
125
126                                Map<String,List<Node>> compInfoNodes = getChildNodes(compsNode);
127
128                                // components
129                                List<Node> compNodes = compInfoNodes.get("Component");
130                                int sizeComp = compNodes.size();
131                                List<Component> comps = new ArrayList<>(sizeComp);
132                                for (int iComp=0; iComp<sizeComp; iComp++) {
133                                        Node compNode = compNodes.get(iComp);
134                                        // comp label
135                                        NamedNodeMap compNodeAttrs = compNode.getAttributes();
136                                        Node labelNode = compNodeAttrs.getNamedItem("component");
137                                        if (labelNode==null) {
138                                                throw new RuntimeException("Each component must have a label." +
139                                                                " See Modification "+id+".");
140                                        }
141                                        String label = labelNode.getTextContent();
142
143                                        if (mapLabelComp.containsKey(label)) {
144                                                throw new RuntimeException("Each component must have a unique label." +
145                                                                " See Modification "+id+".");
146                                        }
147
148                                        // comp PDBCC ID
149                                        Set<String> compIds = new HashSet<>();
150                                        List<Node> compIdNodes = getChildNodes(compNode).get("Id");
151                                        if (compIdNodes!=null) {
152                                                for (Node compIdNode : compIdNodes) {
153                                                        NamedNodeMap compIdNodeAttr = compIdNode.getAttributes();
154                                                        Node compIdSource = compIdNodeAttr.getNamedItem("source");
155                                                        if (compIdSource!=null && "PDBCC".equals(compIdSource.getTextContent())) {
156                                                                String strComps = compIdNode.getTextContent();
157                                                                if (strComps.isEmpty()) {
158                                                                        throw new RuntimeException("Empty component." +
159                                                                                        " See Modification "+id+".");
160                                                                }
161                                                                compIds.addAll(Arrays.asList(strComps.split(",")));
162                                                        }
163                                                }
164                                        }
165
166                                        if (compIds.isEmpty()) {
167                                                throw new RuntimeException("Each component must have a PDBCC ID." +
168                                                                " See Modification "+id+".");
169                                        }
170
171                                        // terminal
172                                        boolean nTerminal = false;
173                                        boolean cTerminal = false;
174                                        List<Node> compTermNode = getChildNodes(compNode).get("Terminal");
175                                        if (compTermNode!=null) {
176                                                if (compTermNode.size()!=1) {
177                                                        throw new RuntimeException("Only one <Terminal> condition is allowed for " +
178                                                                        "each component. See Modification "+id+".");
179                                                }
180                                                String nc = compTermNode.get(0).getTextContent();
181                                                if ("N".equals(nc)) {
182                                                        nTerminal = true;
183                                                } else if ("C".equals(nc)) {
184                                                        cTerminal = true;
185                                                } else {
186                                                        throw new RuntimeException("Only N or C is allowed for <Terminal>." +
187                                                                        " See Modification "+id+".");
188                                                }
189                                        }
190
191                                        // register
192                                        Component comp = Component.of(compIds, nTerminal, cTerminal);
193                                        comps.add(comp);
194                                        mapLabelComp.put(label, comps.size()-1);
195                                }
196
197                                // bonds
198                                List<Node> bondNodes = compInfoNodes.get("Bond");
199                                List<ModificationLinkage> linkages = null;
200                                if (bondNodes!=null) {
201                                        int sizeBonds = bondNodes.size();
202                                        linkages = new ArrayList<>(sizeBonds);
203                                        for (int iBond=0; iBond<sizeBonds; iBond++) {
204                                                Node bondNode = bondNodes.get(iBond);
205                                                Map<String,List<Node>> bondChildNodes = getChildNodes(bondNode);
206                                                if (bondChildNodes==null) {
207                                                        throw new RuntimeException("Each bond must contain two atoms" +
208                                                                        " See Modification "+id+".");
209                                                }
210
211                                                List<Node> atomNodes = bondChildNodes.get("Atom");
212                                                if (atomNodes==null || atomNodes.size()!=2) {
213                                                        throw new RuntimeException("Each bond must contain two atoms" +
214                                                                        " See Modification "+id+".");
215                                                }
216
217                                                // atom 1
218                                                NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes();
219                                                Node compNode = atomNodeAttrs.getNamedItem("component");
220                                                if (compNode==null) {
221                                                        throw new RuntimeException("Each atom must on a component." +
222                                                                        " See Modification "+id+".");
223                                                }
224                                                String labelComp1 = compNode.getTextContent();
225                                                int iComp1 = mapLabelComp.get(labelComp1);
226
227                                                Node labelNode = atomNodeAttrs.getNamedItem("atom");
228                                                String labelAtom1 = labelNode==null?null:labelNode.getTextContent();
229
230                                                String atom1 = atomNodes.get(0).getTextContent();
231                                                if (atom1.isEmpty()) {
232                                                        throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
233                                                                        " See Modification "+id+".");
234                                                }
235                                                List<String> potentialAtoms1 = Arrays.asList(atom1.split(","));
236
237                                                // atom 2
238                                                atomNodeAttrs = atomNodes.get(1).getAttributes();
239                                                compNode = atomNodeAttrs.getNamedItem("component");
240                                                if (compNode==null) {
241                                                        throw new RuntimeException("Each atom must on a component." +
242                                                                        " See Modification "+id+".");
243                                                }
244                                                String labelComp2 = compNode.getTextContent();
245                                                int iComp2 = mapLabelComp.get(labelComp2);
246
247                                                labelNode = atomNodeAttrs.getNamedItem("atom");
248                                                String labelAtom2 = labelNode==null?null:labelNode.getTextContent();
249
250                                                String atom2 = atomNodes.get(1).getTextContent();
251                                                if (atom2.isEmpty()) {
252                                                        throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
253                                                                        " See Modification "+id+".");
254                                                }
255                                                List<String> potentialAtoms2 = Arrays.asList(atom2.split(","));
256
257                                                // add linkage
258                                                ModificationLinkage linkage = new ModificationLinkage(comps,
259                                                                iComp1, potentialAtoms1, labelAtom1,
260                                                                iComp2, potentialAtoms2, labelAtom2);
261                                                linkages.add(linkage);
262                                        }
263                                }
264
265                                condition = new ModificationConditionImpl(comps, linkages);
266                        } // end of condition
267
268                        ProteinModificationImpl.Builder modBuilder =
269                                new ProteinModificationImpl.Builder(id, cat, occType, condition);
270
271                        // description
272                        nodes = infoNodes.get("Description");
273                        if (nodes!=null && !nodes.isEmpty()) {
274                                modBuilder.setDescription(nodes.get(0).getTextContent());
275                        }
276
277                        // cross references
278                        nodes = infoNodes.get("CrossReference");
279                        if (nodes!=null) {
280                                for (Node node:nodes) {
281                                        Map<String,List<Node>> xrefInfoNodes = getChildNodes(node);
282
283                                        // source
284                                        List<Node> xrefNode = xrefInfoNodes.get("Source");
285                                        if (xrefNode==null || xrefNode.size()!=1) {
286                                                throw new RuntimeException("Error in XML file: " +
287                                                        "a cross reference must contain exactly one <Source> field." +
288                                                        " See Modification "+id+".");
289                                        }
290                                        String xrefDb = xrefNode.get(0).getTextContent();
291
292                                        // id
293                                        xrefNode = xrefInfoNodes.get("Id");
294                                        if (xrefNode==null || xrefNode.size()!=1) {
295                                                throw new RuntimeException("Error in XML file: " +
296                                                        "a cross reference must contain exactly one <Id> field." +
297                                                        " See Modification "+id+".");
298                                        }
299                                        String xrefId = xrefNode.get(0).getTextContent();
300
301                                        // name
302                                        String xrefName = null;
303                                        xrefNode = xrefInfoNodes.get("Name");
304                                        if (xrefNode!=null && !xrefNode.isEmpty()) {
305                                                xrefName = xrefNode.get(0).getTextContent();
306                                        }
307
308                                        if ("PDBCC".equals(xrefDb)) {
309                                                modBuilder.setPdbccId(xrefId).setPdbccName(xrefName);
310                                        } else if ("RESID".equals(xrefDb)) {
311                                                modBuilder.setResidId(xrefId).setResidName(xrefName);
312                                        } else if ("PSI-MOD".equals(xrefDb)) {
313                                                modBuilder.setPsimodId(xrefId).setPsimodName(xrefName);
314                                        }
315                                }
316                        } // end of cross references
317
318                        // formula
319                        nodes = infoNodes.get("Formula");
320                        if (nodes!=null && !nodes.isEmpty()) {
321                                modBuilder.setFormula(nodes.get(0).getTextContent());
322                        }
323
324                        // keywords
325                        nodes = infoNodes.get("Keyword");
326                        if (nodes!=null && !nodes.isEmpty()) {
327                                for (Node node : nodes) {
328                                        modBuilder.addKeyword(node.getTextContent());
329                                }
330                        }
331
332                        ProteinModificationRegistry.register(modBuilder.build());
333                }
334        }
335
336        /**
337         * Utility method to group child nodes by their names.
338         * @param parent parent node.
339         * @return Map from name to child nodes.
340         */
341        private static Map<String,List<Node>> getChildNodes(Node parent) {
342                if (parent==null)
343                        return Collections.emptyMap();
344
345                Map<String,List<Node>> children = new HashMap<>();
346
347                NodeList nodes = parent.getChildNodes();
348                int nNodes = nodes.getLength();
349                for (int i=0; i<nNodes; i++) {
350                        Node node = nodes.item(i);
351                        if (node.getNodeType()!=Node.ELEMENT_NODE)
352                                continue;
353
354                        String name = node.getNodeName();
355                        List<Node> namesakes = children.get(name);
356                        if (namesakes==null) {
357                                namesakes = new ArrayList<>();
358                                children.put(name, namesakes);
359                        }
360                        namesakes.add(node);
361                }
362
363                return children;
364        }
365}