001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Feb 22, 2012 021 * Created by Andreas Prlic 022 * 023 * @since 3.0.2 024 */ 025package org.biojava.nbio.structure.io.sifts; 026 027import org.w3c.dom.Document; 028import org.w3c.dom.Element; 029import org.w3c.dom.NodeList; 030import org.xml.sax.SAXException; 031 032import javax.xml.parsers.DocumentBuilder; 033import javax.xml.parsers.DocumentBuilderFactory; 034import javax.xml.parsers.ParserConfigurationException; 035 036import java.io.IOException; 037import java.io.InputStream; 038import java.util.ArrayList; 039import java.util.List; 040 041public class SiftsXMLParser { 042 043 Document dom; 044 List<SiftsEntity> entities; 045 046 static boolean debug = false; 047 public SiftsXMLParser(){ 048 entities = new ArrayList<SiftsEntity>(); 049 } 050 051 public List<SiftsEntity> getEntities(){ 052 return entities; 053 } 054 055 056 public void parseXmlFile(InputStream is){ 057 entities = new ArrayList<SiftsEntity>(); 058 059 //get the factory 060 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 061 062 try { 063 064 //Using factory get an instance of document builder 065 DocumentBuilder db = dbf.newDocumentBuilder(); 066 067 //parse using builder to get DOM representation of the XML file 068 dom = db.parse(is); 069 070 parseDocument(); 071 072 }catch(ParserConfigurationException pce) { 073 pce.printStackTrace(); 074 }catch(SAXException se) { 075 se.printStackTrace(); 076 }catch(IOException ioe) { 077 ioe.printStackTrace(); 078 } 079 } 080 081 082 083 private void parseDocument(){ 084 //get the root element 085 Element docEle = dom.getDocumentElement(); 086 087 //get a nodelist of entities 088 089 NodeList nl = docEle.getElementsByTagName("entity"); 090 if(nl != null && nl.getLength() > 0) { 091 for(int i = 0 ; i < nl.getLength();i++) { 092 093 //get the entity element 094 Element el = (Element)nl.item(i); 095 //get the Employee object 096 SiftsEntity e = getSiftsEntity(el); 097 098 //add it to list 099 entities.add(e); 100 } 101 } 102 } 103 104 /** 105 * <entity type="protein" entityId="A"> 106 */ 107 private SiftsEntity getSiftsEntity(Element empEl) { 108 109 //for each <employee> element get text or int values of 110 //name ,id, age and name 111 112 String type = empEl.getAttribute("type"); 113 String entityId = empEl.getAttribute("entityId"); 114 115 //Create a new Employee with the value read from the xml nodes 116 SiftsEntity entity = new SiftsEntity(type,entityId); 117 118 // get nodelist of segments... 119 NodeList nl = empEl.getElementsByTagName("segment"); 120 if(nl != null && nl.getLength() > 0) { 121 for(int i = 0 ; i < nl.getLength();i++) { 122 123 //get the entity element 124 Element el = (Element)nl.item(i); 125 126 SiftsSegment s = getSiftsSegment(el); 127 128 entity.addSegment(s); 129 130 } 131 } 132 133 return entity; 134 } 135 136 /** segId="4hhb_A_1_140" start="1" end="140" 137 * 138 * @param el 139 * @return 140 */ 141 private SiftsSegment getSiftsSegment(Element el) { 142 143 String segId = el.getAttribute("segId"); 144 String start = el.getAttribute("start"); 145 String end = el.getAttribute("end"); 146 SiftsSegment seg = new SiftsSegment(segId,start,end); 147 148 if ( debug ) 149 System.out.println("parsed " + seg); 150 151 // get nodelist of segments... 152 NodeList nl = el.getElementsByTagName("listResidue"); 153 if(nl != null && nl.getLength() > 0) { 154 for(int i = 0 ; i < nl.getLength();i++) { 155 //get the entity element 156 Element listResidueEl = (Element)nl.item(i); 157 158 NodeList residueNodes = listResidueEl.getElementsByTagName("residue"); 159 if(residueNodes != null && residueNodes.getLength() > 0) { 160 for(int j = 0 ; j < residueNodes.getLength();j++) { 161 Element residue = (Element) residueNodes.item(j); 162 163 SiftsResidue pos = getResidue(residue); 164 seg.addResidue(pos); 165 } 166 } 167 168 } 169 } 170 171 172 return seg; 173 } 174 175 /** 176 * <residue dbResNum="1" dbResName="THR"> 177 <crossRefDb dbSource="PDB" dbVersion="20101103" 178 dbCoordSys="PDBresnum" dbAccessionId="1a4w" dbResNum="1H" 179 dbResName="THR" dbChainId="L"></crossRefDb> 180 <crossRefDb dbSource="UniProt" dbVersion="157-2" 181 dbCoordSys="UniProt" dbAccessionId="P00734" 182 dbResNum="328" dbResName="T"></crossRefDb> 183 <crossRefDb dbSource="SCOP" dbVersion="1.75" 184 dbCoordSys="PDBresnum" dbAccessionId="26083" 185 dbResNum="1H" dbResName="THR" dbChainId="L"></crossRefDb> 186 <residueDetail dbSource="MSD" property="Annotation"> 187 Not_Observed</residueDetail> 188 </residue> 189 190 */ 191 private SiftsResidue getResidue(Element residue) { 192 193 SiftsResidue res = new SiftsResidue(); 194 195 String dbResNumS = residue.getAttribute("dbResNum"); 196 res.setNaturalPos(Integer.parseInt(dbResNumS)); 197 198 String seqResName = residue.getAttribute("dbResName"); 199 res.setSeqResName(seqResName); 200 201 boolean observed = true; 202 203 List<String> details = getTextValues(residue, "residueDetail"); 204 205 if ( details != null && details.contains("Not_Observed")){ 206 observed = false; 207 } 208 res.setNotObserved(! observed); 209 //else if ( detail != null && detail.trim().equalsIgnoreCase("Conflict")){ 210 // 211 //} 212 213 NodeList nl = residue.getElementsByTagName("crossRefDb"); 214 if(nl != null && nl.getLength() > 0) { 215 for(int i = 0 ; i < nl.getLength();i++) { 216 //get the entity element 217 Element crossRefEl = (Element)nl.item(i); 218 219 String dbSource = crossRefEl.getAttribute("dbSource"); 220 String dbCoordSys = crossRefEl.getAttribute("dbCoordSys"); 221 String dbAccessionId = crossRefEl.getAttribute("dbAccessionId"); 222 String dbResNum = crossRefEl.getAttribute("dbResNum"); 223 String dbResName = crossRefEl.getAttribute("dbResName"); 224 String dbChainId = crossRefEl.getAttribute("dbChainId"); 225 226 // System.out.println(dbSource + " " + dbCoordSys + " " + dbAccessionId + " " + dbResNum + " " + dbResName + " " + dbChainId); 227 228 if ( dbSource.equals("PDB") && ( dbCoordSys.equals("PDBresnum"))){ 229 res.setPdbResNum(dbResNum); 230 res.setPdbResName(dbResName); 231 res.setChainId(dbChainId); 232 res.setPdbId(dbAccessionId); 233 } else if ( dbSource.equals("UniProt")){ 234 res.setUniProtPos(Integer.parseInt(dbResNum)); 235 res.setUniProtResName(dbResName); 236 res.setUniProtAccessionId(dbAccessionId); 237 } 238 } 239 } 240 return res; 241 } 242 243 244 245 /** 246 * I take a xml element and the tag name, look for the tag and get 247 * the text content 248 * i.e for <employee><name>John</name></employee> xml snippet if 249 * the Element points to employee node and tagName is 'name' I will return John 250 */ 251 @SuppressWarnings("unused") 252 private String getTextValue(Element ele, String tagName) { 253 String textVal = null; 254 NodeList nl = ele.getElementsByTagName(tagName); 255 if(nl != null && nl.getLength() > 0) { 256 Element el = (Element)nl.item(0); 257 textVal = el.getFirstChild().getNodeValue(); 258 } 259 260 return textVal; 261 } 262 263 private List<String> getTextValues(Element ele, String tagName) { 264 List<String>values = new ArrayList<String>(); 265 NodeList nl = ele.getElementsByTagName(tagName); 266 if(nl != null && nl.getLength() > 0) { 267 for ( int i = 0 ;i < nl.getLength() ; i ++) { 268 269 Element n = (Element) nl.item(i); 270 271 @SuppressWarnings("unused") 272 String k = n.getNodeName(); 273 274 String val = n.getFirstChild().getNodeValue(); 275 if ( val != null) 276 values.add(val); 277 } 278 } 279 280 return values; 281 } 282 283 284 285 286 287 288 289 }