001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Feb 22, 2012 021 * Created by Andreas Prlic 022 * 023 * @since 3.0.2 024 */ 025package org.biojava.nbio.structure.io.sifts; 026 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029import org.w3c.dom.Document; 030import org.w3c.dom.Element; 031import org.w3c.dom.NodeList; 032import org.xml.sax.SAXException; 033 034import javax.xml.parsers.DocumentBuilder; 035import javax.xml.parsers.DocumentBuilderFactory; 036import javax.xml.parsers.ParserConfigurationException; 037 038import java.io.IOException; 039import java.io.InputStream; 040import java.util.ArrayList; 041import java.util.List; 042 043public class SiftsXMLParser { 044 045 private final static Logger logger = LoggerFactory.getLogger(SiftsXMLParser.class); 046 047 048 049 Document dom; 050 List<SiftsEntity> entities; 051 052 static boolean debug = false; 053 public SiftsXMLParser(){ 054 entities = new ArrayList<SiftsEntity>(); 055 } 056 057 public List<SiftsEntity> getEntities(){ 058 return entities; 059 } 060 061 062 public void parseXmlFile(InputStream is){ 063 entities = new ArrayList<SiftsEntity>(); 064 065 //get the factory 066 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 067 068 try { 069 070 //Using factory get an instance of document builder 071 DocumentBuilder db = dbf.newDocumentBuilder(); 072 073 //parse using builder to get DOM representation of the XML file 074 dom = db.parse(is); 075 076 parseDocument(); 077 078 }catch(ParserConfigurationException pce) { 079 pce.printStackTrace(); 080 }catch(SAXException se) { 081 se.printStackTrace(); 082 }catch(IOException ioe) { 083 ioe.printStackTrace(); 084 } 085 } 086 087 088 089 private void parseDocument(){ 090 //get the root element 091 Element docEle = dom.getDocumentElement(); 092 093 //get a nodelist of entities 094 095 NodeList nl = docEle.getElementsByTagName("entity"); 096 if(nl != null && nl.getLength() > 0) { 097 for(int i = 0 ; i < nl.getLength();i++) { 098 099 //get the entity element 100 Element el = (Element)nl.item(i); 101 //get the Employee object 102 SiftsEntity e = getSiftsEntity(el); 103 104 //add it to list 105 entities.add(e); 106 } 107 } 108 } 109 110 /** 111 * <entity type="protein" entityId="A"> 112 */ 113 private SiftsEntity getSiftsEntity(Element empEl) { 114 115 //for each <employee> element get text or int values of 116 //name ,id, age and name 117 118 String type = empEl.getAttribute("type"); 119 String entityId = empEl.getAttribute("entityId"); 120 121 //Create a new Employee with the value read from the xml nodes 122 SiftsEntity entity = new SiftsEntity(type,entityId); 123 124 // get nodelist of segments... 125 NodeList nl = empEl.getElementsByTagName("segment"); 126 if(nl != null && nl.getLength() > 0) { 127 for(int i = 0 ; i < nl.getLength();i++) { 128 129 //get the entity element 130 Element el = (Element)nl.item(i); 131 132 SiftsSegment s = getSiftsSegment(el); 133 134 logger.debug("new segment: " + s); 135 entity.addSegment(s); 136 137 } 138 } 139 140 logger.debug("new SIFTS entity: " + entity); 141 return entity; 142 } 143 144 /** segId="4hhb_A_1_140" start="1" end="140" 145 * 146 * @param el 147 * @return 148 */ 149 private SiftsSegment getSiftsSegment(Element el) { 150 151 String segId = el.getAttribute("segId"); 152 String start = el.getAttribute("start"); 153 String end = el.getAttribute("end"); 154 SiftsSegment seg = new SiftsSegment(segId,start,end); 155 156 if ( debug ) 157 System.out.println("parsed " + seg); 158 159 // get nodelist of segments... 160 NodeList nl = el.getElementsByTagName("listResidue"); 161 if(nl != null && nl.getLength() > 0) { 162 for(int i = 0 ; i < nl.getLength();i++) { 163 //get the entity element 164 Element listResidueEl = (Element)nl.item(i); 165 166 NodeList residueNodes = listResidueEl.getElementsByTagName("residue"); 167 if(residueNodes != null && residueNodes.getLength() > 0) { 168 for(int j = 0 ; j < residueNodes.getLength();j++) { 169 Element residue = (Element) residueNodes.item(j); 170 171 SiftsResidue pos = getResidue(residue); 172 seg.addResidue(pos); 173 } 174 } 175 176 } 177 } 178 179 180 return seg; 181 } 182 183 /** 184 * <residue dbResNum="1" dbResName="THR"> 185 <crossRefDb dbSource="PDB" dbVersion="20101103" 186 dbCoordSys="PDBresnum" dbAccessionId="1a4w" dbResNum="1H" 187 dbResName="THR" dbChainId="L"></crossRefDb> 188 <crossRefDb dbSource="UniProt" dbVersion="157-2" 189 dbCoordSys="UniProt" dbAccessionId="P00734" 190 dbResNum="328" dbResName="T"></crossRefDb> 191 <crossRefDb dbSource="SCOP" dbVersion="1.75" 192 dbCoordSys="PDBresnum" dbAccessionId="26083" 193 dbResNum="1H" dbResName="THR" dbChainId="L"></crossRefDb> 194 <residueDetail dbSource="MSD" property="Annotation"> 195 Not_Observed</residueDetail> 196 </residue> 197 198 */ 199 private SiftsResidue getResidue(Element residue) { 200 201 SiftsResidue res = new SiftsResidue(); 202 203 String dbResNumS = residue.getAttribute("dbResNum"); 204 res.setNaturalPos(Integer.parseInt(dbResNumS)); 205 206 String seqResName = residue.getAttribute("dbResName"); 207 res.setSeqResName(seqResName); 208 209 boolean observed = true; 210 211 List<String> details = getTextValues(residue, "residueDetail"); 212 213 if ( details != null && details.contains("Not_Observed")){ 214 observed = false; 215 } 216 res.setNotObserved(! observed); 217 //else if ( detail != null && detail.trim().equalsIgnoreCase("Conflict")){ 218 // 219 //} 220 221 NodeList nl = residue.getElementsByTagName("crossRefDb"); 222 if(nl != null && nl.getLength() > 0) { 223 for(int i = 0 ; i < nl.getLength();i++) { 224 //get the entity element 225 Element crossRefEl = (Element)nl.item(i); 226 227 String dbSource = crossRefEl.getAttribute("dbSource"); 228 String dbCoordSys = crossRefEl.getAttribute("dbCoordSys"); 229 String dbAccessionId = crossRefEl.getAttribute("dbAccessionId"); 230 String dbResNum = crossRefEl.getAttribute("dbResNum"); 231 String dbResName = crossRefEl.getAttribute("dbResName"); 232 String dbChainId = crossRefEl.getAttribute("dbChainId"); 233 234 // System.out.println(dbSource + " " + dbCoordSys + " " + dbAccessionId + " " + dbResNum + " " + dbResName + " " + dbChainId); 235 236 if ( dbSource.equals("PDB") && ( dbCoordSys.equals("PDBresnum"))){ 237 res.setPdbResNum(dbResNum); 238 res.setPdbResName(dbResName); 239 res.setChainId(dbChainId); 240 res.setPdbId(dbAccessionId); 241 } else if ( dbSource.equals("UniProt")){ 242 res.setUniProtPos(Integer.parseInt(dbResNum)); 243 res.setUniProtResName(dbResName); 244 res.setUniProtAccessionId(dbAccessionId); 245 } 246 } 247 } 248 return res; 249 } 250 251 252 253 /** 254 * I take a xml element and the tag name, look for the tag and get 255 * the text content 256 * i.e for <employee><name>John</name></employee> xml snippet if 257 * the Element points to employee node and tagName is 'name' I will return John 258 */ 259 @SuppressWarnings("unused") 260 private String getTextValue(Element ele, String tagName) { 261 String textVal = null; 262 NodeList nl = ele.getElementsByTagName(tagName); 263 if(nl != null && nl.getLength() > 0) { 264 Element el = (Element)nl.item(0); 265 textVal = el.getFirstChild().getNodeValue(); 266 } 267 268 return textVal; 269 } 270 271 private List<String> getTextValues(Element ele, String tagName) { 272 List<String>values = new ArrayList<String>(); 273 NodeList nl = ele.getElementsByTagName(tagName); 274 if(nl != null && nl.getLength() > 0) { 275 for ( int i = 0 ;i < nl.getLength() ; i ++) { 276 277 Element n = (Element) nl.item(i); 278 279 @SuppressWarnings("unused") 280 String k = n.getNodeName(); 281 282 String val = n.getFirstChild().getNodeValue(); 283 if ( val != null) 284 values.add(val); 285 } 286 } 287 288 return values; 289 } 290 291 292 293 294 295 296 297 }