001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 1, 2010 021 * Author: Jianjiong Gao 022 * 023 */ 024 025package org.biojava.nbio.protmod.io; 026 027import org.biojava.nbio.protmod.*; 028import org.w3c.dom.Document; 029import org.w3c.dom.NamedNodeMap; 030import org.w3c.dom.Node; 031import org.w3c.dom.NodeList; 032import org.xml.sax.SAXException; 033 034import javax.xml.parsers.DocumentBuilder; 035import javax.xml.parsers.DocumentBuilderFactory; 036import javax.xml.parsers.ParserConfigurationException; 037import java.io.IOException; 038import java.io.InputStream; 039import java.util.*; 040 041/** 042 * 043 * @author Jianjiong Gao 044 * @since 3.0 045 */ 046public final class ProteinModificationXmlReader { 047 /** 048 * This is a utility class and thus cannot be instantialized. 049 */ 050 private ProteinModificationXmlReader() {} 051 052 /** 053 * Read protein modifications from XML file and register them. 054 * @param isXml {@link InputStream} of the XML file. 055 * @throws IOException if failed to read the XML file. 056 * @throws ParserConfigurationException if parse errors occur. 057 * @throws SAXException the {@link DocumentBuilder} cannot be created. 058 */ 059 public static void registerProteinModificationFromXml(InputStream isXml) 060 throws IOException, ParserConfigurationException, SAXException { 061 if (isXml==null) { 062 throw new IllegalArgumentException("Null argument."); 063 } 064 065 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 066 DocumentBuilder builder = factory.newDocumentBuilder(); 067 Document doc = builder.parse(isXml); 068 069 NodeList modNodes = doc.getElementsByTagName("Entry"); 070 int modSize = modNodes.getLength(); 071 List<Node> nodes; 072 for (int iMod=0; iMod<modSize; iMod++) { 073 Node modNode = modNodes.item(iMod); 074 Map<String,List<Node>> infoNodes = getChildNodes(modNode); 075 076 // ID 077 nodes = infoNodes.get("Id"); 078 if (nodes==null || nodes.size()!=1) { 079 throw new RuntimeException("Each modification must have exact " + 080 "one <Id> field."); 081 } 082 String id = nodes.get(0).getTextContent(); 083 084 // modification category 085 nodes = infoNodes.get("Category"); 086 if (nodes==null || nodes.size()!=1) { 087 throw new RuntimeException("Each modification must have exact " + 088 "one <Category> field. See Modification "+id+"."); 089 } 090 ModificationCategory cat = ModificationCategory.getByLabel( 091 nodes.get(0).getTextContent()); 092 if (cat==null) { 093 throw new RuntimeException(nodes.get(0).getTextContent()+ 094 " is not defined as an modification category." + 095 " See Modification "+id+"."); 096 } 097 098 // occurrence type 099 nodes = infoNodes.get("Occurrence"); 100 if (nodes==null || nodes.size()!=1) { 101 throw new RuntimeException("Each modification must have exact " + 102 "one <Occurrence> field. See Modification "+id+"."); 103 } 104 ModificationOccurrenceType occType = ModificationOccurrenceType 105 .getByLabel(nodes.get(0).getTextContent()); 106 if (occType==null) { 107 throw new RuntimeException(nodes.get(0).getTextContent()+ 108 " is not defined as an modification occurence type." + 109 " See Modification "+id+"."); 110 } 111 112 // condition 113 ModificationCondition condition = null; 114 { 115 nodes = infoNodes.get("Condition"); 116 if (nodes==null || nodes.size()!=1) { 117 throw new RuntimeException("Each modification must have exact " + 118 "one <Condition> field. See Modification "+id+"."); 119 } 120 121 Node compsNode = nodes.get(0); 122 123 // keep track of the labels of component indices 124 Map<String,Integer> mapLabelComp = new HashMap<>(); 125 126 Map<String,List<Node>> compInfoNodes = getChildNodes(compsNode); 127 128 // components 129 List<Node> compNodes = compInfoNodes.get("Component"); 130 int sizeComp = compNodes.size(); 131 List<Component> comps = new ArrayList<>(sizeComp); 132 for (int iComp=0; iComp<sizeComp; iComp++) { 133 Node compNode = compNodes.get(iComp); 134 // comp label 135 NamedNodeMap compNodeAttrs = compNode.getAttributes(); 136 Node labelNode = compNodeAttrs.getNamedItem("component"); 137 if (labelNode==null) { 138 throw new RuntimeException("Each component must have a label." + 139 " See Modification "+id+"."); 140 } 141 String label = labelNode.getTextContent(); 142 143 if (mapLabelComp.containsKey(label)) { 144 throw new RuntimeException("Each component must have a unique label." + 145 " See Modification "+id+"."); 146 } 147 148 // comp PDBCC ID 149 Set<String> compIds = new HashSet<>(); 150 List<Node> compIdNodes = getChildNodes(compNode).get("Id"); 151 if (compIdNodes!=null) { 152 for (Node compIdNode : compIdNodes) { 153 NamedNodeMap compIdNodeAttr = compIdNode.getAttributes(); 154 Node compIdSource = compIdNodeAttr.getNamedItem("source"); 155 if (compIdSource!=null && "PDBCC".equals(compIdSource.getTextContent())) { 156 String strComps = compIdNode.getTextContent(); 157 if (strComps.isEmpty()) { 158 throw new RuntimeException("Empty component." + 159 " See Modification "+id+"."); 160 } 161 compIds.addAll(Arrays.asList(strComps.split(","))); 162 } 163 } 164 } 165 166 if (compIds.isEmpty()) { 167 throw new RuntimeException("Each component must have a PDBCC ID." + 168 " See Modification "+id+"."); 169 } 170 171 // terminal 172 boolean nTerminal = false; 173 boolean cTerminal = false; 174 List<Node> compTermNode = getChildNodes(compNode).get("Terminal"); 175 if (compTermNode!=null) { 176 if (compTermNode.size()!=1) { 177 throw new RuntimeException("Only one <Terminal> condition is allowed for " + 178 "each component. See Modification "+id+"."); 179 } 180 String nc = compTermNode.get(0).getTextContent(); 181 if ("N".equals(nc)) { 182 nTerminal = true; 183 } else if ("C".equals(nc)) { 184 cTerminal = true; 185 } else { 186 throw new RuntimeException("Only N or C is allowed for <Terminal>." + 187 " See Modification "+id+"."); 188 } 189 } 190 191 // register 192 Component comp = Component.of(compIds, nTerminal, cTerminal); 193 comps.add(comp); 194 mapLabelComp.put(label, comps.size()-1); 195 } 196 197 // bonds 198 List<Node> bondNodes = compInfoNodes.get("Bond"); 199 List<ModificationLinkage> linkages = null; 200 if (bondNodes!=null) { 201 int sizeBonds = bondNodes.size(); 202 linkages = new ArrayList<>(sizeBonds); 203 for (int iBond=0; iBond<sizeBonds; iBond++) { 204 Node bondNode = bondNodes.get(iBond); 205 Map<String,List<Node>> bondChildNodes = getChildNodes(bondNode); 206 if (bondChildNodes==null) { 207 throw new RuntimeException("Each bond must contain two atoms" + 208 " See Modification "+id+"."); 209 } 210 211 List<Node> atomNodes = bondChildNodes.get("Atom"); 212 if (atomNodes==null || atomNodes.size()!=2) { 213 throw new RuntimeException("Each bond must contain two atoms" + 214 " See Modification "+id+"."); 215 } 216 217 // atom 1 218 NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes(); 219 Node compNode = atomNodeAttrs.getNamedItem("component"); 220 if (compNode==null) { 221 throw new RuntimeException("Each atom must on a component." + 222 " See Modification "+id+"."); 223 } 224 String labelComp1 = compNode.getTextContent(); 225 int iComp1 = mapLabelComp.get(labelComp1); 226 227 Node labelNode = atomNodeAttrs.getNamedItem("atom"); 228 String labelAtom1 = labelNode==null?null:labelNode.getTextContent(); 229 230 String atom1 = atomNodes.get(0).getTextContent(); 231 if (atom1.isEmpty()) { 232 throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." + 233 " See Modification "+id+"."); 234 } 235 List<String> potentialAtoms1 = Arrays.asList(atom1.split(",")); 236 237 // atom 2 238 atomNodeAttrs = atomNodes.get(1).getAttributes(); 239 compNode = atomNodeAttrs.getNamedItem("component"); 240 if (compNode==null) { 241 throw new RuntimeException("Each atom must on a component." + 242 " See Modification "+id+"."); 243 } 244 String labelComp2 = compNode.getTextContent(); 245 int iComp2 = mapLabelComp.get(labelComp2); 246 247 labelNode = atomNodeAttrs.getNamedItem("atom"); 248 String labelAtom2 = labelNode==null?null:labelNode.getTextContent(); 249 250 String atom2 = atomNodes.get(1).getTextContent(); 251 if (atom2.isEmpty()) { 252 throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." + 253 " See Modification "+id+"."); 254 } 255 List<String> potentialAtoms2 = Arrays.asList(atom2.split(",")); 256 257 // add linkage 258 ModificationLinkage linkage = new ModificationLinkage(comps, 259 iComp1, potentialAtoms1, labelAtom1, 260 iComp2, potentialAtoms2, labelAtom2); 261 linkages.add(linkage); 262 } 263 } 264 265 condition = new ModificationConditionImpl(comps, linkages); 266 } // end of condition 267 268 ProteinModificationImpl.Builder modBuilder = 269 new ProteinModificationImpl.Builder(id, cat, occType, condition); 270 271 // description 272 nodes = infoNodes.get("Description"); 273 if (nodes!=null && !nodes.isEmpty()) { 274 modBuilder.setDescription(nodes.get(0).getTextContent()); 275 } 276 277 // cross references 278 nodes = infoNodes.get("CrossReference"); 279 if (nodes!=null) { 280 for (Node node:nodes) { 281 Map<String,List<Node>> xrefInfoNodes = getChildNodes(node); 282 283 // source 284 List<Node> xrefNode = xrefInfoNodes.get("Source"); 285 if (xrefNode==null || xrefNode.size()!=1) { 286 throw new RuntimeException("Error in XML file: " + 287 "a cross reference must contain exactly one <Source> field." + 288 " See Modification "+id+"."); 289 } 290 String xrefDb = xrefNode.get(0).getTextContent(); 291 292 // id 293 xrefNode = xrefInfoNodes.get("Id"); 294 if (xrefNode==null || xrefNode.size()!=1) { 295 throw new RuntimeException("Error in XML file: " + 296 "a cross reference must contain exactly one <Id> field." + 297 " See Modification "+id+"."); 298 } 299 String xrefId = xrefNode.get(0).getTextContent(); 300 301 // name 302 String xrefName = null; 303 xrefNode = xrefInfoNodes.get("Name"); 304 if (xrefNode!=null && !xrefNode.isEmpty()) { 305 xrefName = xrefNode.get(0).getTextContent(); 306 } 307 308 if ("PDBCC".equals(xrefDb)) { 309 modBuilder.setPdbccId(xrefId).setPdbccName(xrefName); 310 } else if ("RESID".equals(xrefDb)) { 311 modBuilder.setResidId(xrefId).setResidName(xrefName); 312 } else if ("PSI-MOD".equals(xrefDb)) { 313 modBuilder.setPsimodId(xrefId).setPsimodName(xrefName); 314 } 315 } 316 } // end of cross references 317 318 // formula 319 nodes = infoNodes.get("Formula"); 320 if (nodes!=null && !nodes.isEmpty()) { 321 modBuilder.setFormula(nodes.get(0).getTextContent()); 322 } 323 324 // keywords 325 nodes = infoNodes.get("Keyword"); 326 if (nodes!=null && !nodes.isEmpty()) { 327 for (Node node : nodes) { 328 modBuilder.addKeyword(node.getTextContent()); 329 } 330 } 331 332 ProteinModificationRegistry.register(modBuilder.build()); 333 } 334 } 335 336 /** 337 * Utility method to group child nodes by their names. 338 * @param parent parent node. 339 * @return Map from name to child nodes. 340 */ 341 private static Map<String,List<Node>> getChildNodes(Node parent) { 342 if (parent==null) 343 return Collections.emptyMap(); 344 345 Map<String,List<Node>> children = new HashMap<>(); 346 347 NodeList nodes = parent.getChildNodes(); 348 int nNodes = nodes.getLength(); 349 for (int i=0; i<nNodes; i++) { 350 Node node = nodes.item(i); 351 if (node.getNodeType()!=Node.ELEMENT_NODE) 352 continue; 353 354 String name = node.getNodeName(); 355 List<Node> namesakes = children.get(name); 356 if (namesakes==null) { 357 namesakes = new ArrayList<>(); 358 children.put(name, namesakes); 359 } 360 namesakes.add(node); 361 } 362 363 return children; 364 } 365}