001/**
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Feb 22, 2012
021 * Created by Andreas Prlic
022 *
023 * @since 3.0.2
024 */
025package org.biojava.nbio.structure.io.sifts;
026
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029import org.w3c.dom.Document;
030import org.w3c.dom.Element;
031import org.w3c.dom.NodeList;
032import org.xml.sax.SAXException;
033
034import javax.xml.parsers.DocumentBuilder;
035import javax.xml.parsers.DocumentBuilderFactory;
036import javax.xml.parsers.ParserConfigurationException;
037
038import java.io.IOException;
039import java.io.InputStream;
040import java.util.ArrayList;
041import java.util.List;
042
043public class SiftsXMLParser {
044
045        private final static Logger logger = LoggerFactory.getLogger(SiftsXMLParser.class);
046
047
048
049        Document dom;
050        List<SiftsEntity> entities;
051
052        static boolean debug = false;
053        public SiftsXMLParser(){
054                entities = new ArrayList<SiftsEntity>();
055        }
056
057        public List<SiftsEntity> getEntities(){
058                return entities;
059        }
060
061
062        public void parseXmlFile(InputStream is){
063                entities = new ArrayList<SiftsEntity>();
064
065                //get the factory
066                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
067
068                try {
069
070                        //Using factory get an instance of document builder
071                        DocumentBuilder db = dbf.newDocumentBuilder();
072
073                        //parse using builder to get DOM representation of the XML file
074                        dom = db.parse(is);
075
076                        parseDocument();
077
078                }catch(ParserConfigurationException pce) {
079                        pce.printStackTrace();
080                }catch(SAXException se) {
081                        se.printStackTrace();
082                }catch(IOException ioe) {
083                        ioe.printStackTrace();
084                }
085        }
086
087
088
089                private void parseDocument(){
090                        //get the root element
091                        Element docEle = dom.getDocumentElement();
092
093                        //get a nodelist of  entities
094
095                        NodeList nl = docEle.getElementsByTagName("entity");
096                        if(nl != null && nl.getLength() > 0) {
097                                for(int i = 0 ; i < nl.getLength();i++) {
098
099                                        //get the entity element
100                                        Element el = (Element)nl.item(i);
101                                        //get the Employee object
102                                        SiftsEntity e = getSiftsEntity(el);
103
104                                        //add it to list
105                                        entities.add(e);
106                                }
107                        }
108                }
109
110                /**
111                 * <entity type="protein" entityId="A">
112                 */
113                private SiftsEntity getSiftsEntity(Element empEl) {
114
115                        //for each <employee> element get text or int values of
116                        //name ,id, age and name
117
118                        String type = empEl.getAttribute("type");
119                        String entityId = empEl.getAttribute("entityId");
120
121                        //Create a new Employee with the value read from the xml nodes
122                        SiftsEntity entity = new SiftsEntity(type,entityId);
123
124                        // get nodelist of segments...
125                        NodeList nl = empEl.getElementsByTagName("segment");
126                        if(nl != null && nl.getLength() > 0) {
127                                for(int i = 0 ; i < nl.getLength();i++) {
128
129                                        //get the entity element
130                                        Element el = (Element)nl.item(i);
131
132                                        SiftsSegment s = getSiftsSegment(el);
133
134                                        logger.debug("new segment: " + s);
135                                        entity.addSegment(s);
136
137                                }
138                        }
139
140                        logger.debug("new SIFTS entity: " + entity);
141                        return entity;
142                }
143
144                /** segId="4hhb_A_1_140" start="1" end="140"
145                 *
146                 * @param el
147                 * @return
148                 */
149                private SiftsSegment getSiftsSegment(Element el) {
150
151                        String segId = el.getAttribute("segId");
152                        String start = el.getAttribute("start");
153                        String end = el.getAttribute("end");
154                        SiftsSegment seg = new SiftsSegment(segId,start,end);
155
156                        if ( debug )
157                                System.out.println("parsed " + seg);
158
159                        // get nodelist of segments...
160                        NodeList nl = el.getElementsByTagName("listResidue");
161                        if(nl != null && nl.getLength() > 0) {
162                                for(int i = 0 ; i < nl.getLength();i++) {
163                                        //get the entity element
164                                        Element listResidueEl = (Element)nl.item(i);
165
166                                        NodeList residueNodes = listResidueEl.getElementsByTagName("residue");
167                                        if(residueNodes != null && residueNodes.getLength() > 0) {
168                                                for(int j = 0 ; j < residueNodes.getLength();j++) {
169                                                        Element residue = (Element) residueNodes.item(j);
170
171                                                        SiftsResidue pos = getResidue(residue);
172                                                        seg.addResidue(pos);
173                                                }
174                                        }
175
176                                }
177                        }
178
179
180                        return seg;
181                }
182
183                /**
184                 *  <residue dbResNum="1" dbResName="THR">
185                                        <crossRefDb dbSource="PDB" dbVersion="20101103"
186                                        dbCoordSys="PDBresnum" dbAccessionId="1a4w" dbResNum="1H"
187                                        dbResName="THR" dbChainId="L"></crossRefDb>
188                                        <crossRefDb dbSource="UniProt" dbVersion="157-2"
189                                        dbCoordSys="UniProt" dbAccessionId="P00734"
190                                        dbResNum="328" dbResName="T"></crossRefDb>
191                                        <crossRefDb dbSource="SCOP" dbVersion="1.75"
192                                        dbCoordSys="PDBresnum" dbAccessionId="26083"
193                                        dbResNum="1H" dbResName="THR" dbChainId="L"></crossRefDb>
194                                        <residueDetail dbSource="MSD" property="Annotation">
195                                        Not_Observed</residueDetail>
196                                </residue>
197
198                 */
199                private SiftsResidue getResidue(Element residue) {
200
201                        SiftsResidue res = new SiftsResidue();
202
203                        String dbResNumS = residue.getAttribute("dbResNum");
204                        res.setNaturalPos(Integer.parseInt(dbResNumS));
205
206                        String seqResName = residue.getAttribute("dbResName");
207                        res.setSeqResName(seqResName);
208
209                        boolean observed = true;
210
211                        List<String> details = getTextValues(residue, "residueDetail");
212
213                        if ( details != null && details.contains("Not_Observed")){
214                                observed = false;
215                        }
216                        res.setNotObserved(! observed);
217                        //else if ( detail != null && detail.trim().equalsIgnoreCase("Conflict")){
218                                //
219                        //}
220
221                        NodeList nl = residue.getElementsByTagName("crossRefDb");
222                        if(nl != null && nl.getLength() > 0) {
223                                for(int i = 0 ; i < nl.getLength();i++) {
224                                        //get the entity element
225                                        Element crossRefEl = (Element)nl.item(i);
226
227                                        String dbSource = crossRefEl.getAttribute("dbSource");
228                                        String dbCoordSys = crossRefEl.getAttribute("dbCoordSys");
229                                        String dbAccessionId = crossRefEl.getAttribute("dbAccessionId");
230                                        String dbResNum = crossRefEl.getAttribute("dbResNum");
231                                        String dbResName = crossRefEl.getAttribute("dbResName");
232                                        String dbChainId = crossRefEl.getAttribute("dbChainId");
233
234                                //      System.out.println(dbSource + " " + dbCoordSys + " " + dbAccessionId + " " + dbResNum + " " + dbResName + " " + dbChainId);
235
236                                        if ( dbSource.equals("PDB") && ( dbCoordSys.equals("PDBresnum"))){
237                                                res.setPdbResNum(dbResNum);
238                                                res.setPdbResName(dbResName);
239                                                res.setChainId(dbChainId);
240                                                res.setPdbId(dbAccessionId);
241                                        } else if ( dbSource.equals("UniProt")){
242                                                res.setUniProtPos(Integer.parseInt(dbResNum));
243                                                res.setUniProtResName(dbResName);
244                                                res.setUniProtAccessionId(dbAccessionId);
245                                        }
246                                }
247                        }
248                        return res;
249                }
250
251
252
253                /**
254                 * I take a xml element and the tag name, look for the tag and get
255                 * the text content
256                 * i.e for <employee><name>John</name></employee> xml snippet if
257                 * the Element points to employee node and tagName is 'name' I will return John
258                 */
259                @SuppressWarnings("unused")
260                private String getTextValue(Element ele, String tagName) {
261                        String textVal = null;
262                        NodeList nl = ele.getElementsByTagName(tagName);
263                        if(nl != null && nl.getLength() > 0) {
264                                Element el = (Element)nl.item(0);
265                                textVal = el.getFirstChild().getNodeValue();
266                        }
267
268                        return textVal;
269                }
270
271        private List<String> getTextValues(Element ele, String tagName) {
272                List<String>values = new ArrayList<String>();
273                NodeList nl = ele.getElementsByTagName(tagName);
274                if(nl != null && nl.getLength() > 0) {
275                        for ( int i = 0 ;i < nl.getLength() ; i ++) {
276
277                                Element n = (Element) nl.item(i);
278
279                                @SuppressWarnings("unused")
280                                String k = n.getNodeName();
281
282                                String val = n.getFirstChild().getNodeValue();
283                                if ( val != null)
284                                        values.add(val);
285                        }
286                }
287
288                return values;
289        }
290
291
292
293
294
295
296
297        }