001/**
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Feb 22, 2012
021 * Created by Andreas Prlic
022 *
023 * @since 3.0.2
024 */
025package org.biojava.nbio.structure.io.sifts;
026
027import org.w3c.dom.Document;
028import org.w3c.dom.Element;
029import org.w3c.dom.NodeList;
030import org.xml.sax.SAXException;
031
032import javax.xml.parsers.DocumentBuilder;
033import javax.xml.parsers.DocumentBuilderFactory;
034import javax.xml.parsers.ParserConfigurationException;
035
036import java.io.IOException;
037import java.io.InputStream;
038import java.util.ArrayList;
039import java.util.List;
040
041public class SiftsXMLParser {
042
043        Document dom;
044        List<SiftsEntity> entities;
045
046        static boolean debug = false;
047        public SiftsXMLParser(){
048                entities = new ArrayList<SiftsEntity>();
049        }
050
051        public List<SiftsEntity> getEntities(){
052                return entities;
053        }
054
055
056        public void parseXmlFile(InputStream is){
057                entities = new ArrayList<SiftsEntity>();
058
059                //get the factory
060                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
061
062                try {
063
064                        //Using factory get an instance of document builder
065                        DocumentBuilder db = dbf.newDocumentBuilder();
066
067                        //parse using builder to get DOM representation of the XML file
068                        dom = db.parse(is);
069
070                        parseDocument();
071
072                }catch(ParserConfigurationException pce) {
073                        pce.printStackTrace();
074                }catch(SAXException se) {
075                        se.printStackTrace();
076                }catch(IOException ioe) {
077                        ioe.printStackTrace();
078                }
079        }
080
081
082
083                private void parseDocument(){
084                        //get the root element
085                        Element docEle = dom.getDocumentElement();
086
087                        //get a nodelist of  entities
088
089                        NodeList nl = docEle.getElementsByTagName("entity");
090                        if(nl != null && nl.getLength() > 0) {
091                                for(int i = 0 ; i < nl.getLength();i++) {
092
093                                        //get the entity element
094                                        Element el = (Element)nl.item(i);
095                                        //get the Employee object
096                                        SiftsEntity e = getSiftsEntity(el);
097
098                                        //add it to list
099                                        entities.add(e);
100                                }
101                        }
102                }
103
104                /**
105                 * <entity type="protein" entityId="A">
106                 */
107                private SiftsEntity getSiftsEntity(Element empEl) {
108
109                        //for each <employee> element get text or int values of
110                        //name ,id, age and name
111
112                        String type = empEl.getAttribute("type");
113                        String entityId = empEl.getAttribute("entityId");
114
115                        //Create a new Employee with the value read from the xml nodes
116                        SiftsEntity entity = new SiftsEntity(type,entityId);
117
118                        // get nodelist of segments...
119                        NodeList nl = empEl.getElementsByTagName("segment");
120                        if(nl != null && nl.getLength() > 0) {
121                                for(int i = 0 ; i < nl.getLength();i++) {
122
123                                        //get the entity element
124                                        Element el = (Element)nl.item(i);
125
126                                        SiftsSegment s = getSiftsSegment(el);
127
128                                        entity.addSegment(s);
129
130                                }
131                        }
132
133                        return entity;
134                }
135
136                /** segId="4hhb_A_1_140" start="1" end="140"
137                 *
138                 * @param el
139                 * @return
140                 */
141                private SiftsSegment getSiftsSegment(Element el) {
142
143                        String segId = el.getAttribute("segId");
144                        String start = el.getAttribute("start");
145                        String end = el.getAttribute("end");
146                        SiftsSegment seg = new SiftsSegment(segId,start,end);
147
148                        if ( debug )
149                                System.out.println("parsed " + seg);
150
151                        // get nodelist of segments...
152                        NodeList nl = el.getElementsByTagName("listResidue");
153                        if(nl != null && nl.getLength() > 0) {
154                                for(int i = 0 ; i < nl.getLength();i++) {
155                                        //get the entity element
156                                        Element listResidueEl = (Element)nl.item(i);
157
158                                        NodeList residueNodes = listResidueEl.getElementsByTagName("residue");
159                                        if(residueNodes != null && residueNodes.getLength() > 0) {
160                                                for(int j = 0 ; j < residueNodes.getLength();j++) {
161                                                        Element residue = (Element) residueNodes.item(j);
162
163                                                        SiftsResidue pos = getResidue(residue);
164                                                        seg.addResidue(pos);
165                                                }
166                                        }
167
168                                }
169                        }
170
171
172                        return seg;
173                }
174
175                /**
176                 *  <residue dbResNum="1" dbResName="THR">
177                                        <crossRefDb dbSource="PDB" dbVersion="20101103"
178                                        dbCoordSys="PDBresnum" dbAccessionId="1a4w" dbResNum="1H"
179                                        dbResName="THR" dbChainId="L"></crossRefDb>
180                                        <crossRefDb dbSource="UniProt" dbVersion="157-2"
181                                        dbCoordSys="UniProt" dbAccessionId="P00734"
182                                        dbResNum="328" dbResName="T"></crossRefDb>
183                                        <crossRefDb dbSource="SCOP" dbVersion="1.75"
184                                        dbCoordSys="PDBresnum" dbAccessionId="26083"
185                                        dbResNum="1H" dbResName="THR" dbChainId="L"></crossRefDb>
186                                        <residueDetail dbSource="MSD" property="Annotation">
187                                        Not_Observed</residueDetail>
188                                </residue>
189
190                 */
191                private SiftsResidue getResidue(Element residue) {
192
193                        SiftsResidue res = new SiftsResidue();
194
195                        String dbResNumS = residue.getAttribute("dbResNum");
196                        res.setNaturalPos(Integer.parseInt(dbResNumS));
197
198                        String seqResName = residue.getAttribute("dbResName");
199                        res.setSeqResName(seqResName);
200
201                        boolean observed = true;
202
203                        List<String> details = getTextValues(residue, "residueDetail");
204
205                        if ( details != null && details.contains("Not_Observed")){
206                                observed = false;
207                        }
208                        res.setNotObserved(! observed);
209                        //else if ( detail != null && detail.trim().equalsIgnoreCase("Conflict")){
210                                //
211                        //}
212
213                        NodeList nl = residue.getElementsByTagName("crossRefDb");
214                        if(nl != null && nl.getLength() > 0) {
215                                for(int i = 0 ; i < nl.getLength();i++) {
216                                        //get the entity element
217                                        Element crossRefEl = (Element)nl.item(i);
218
219                                        String dbSource = crossRefEl.getAttribute("dbSource");
220                                        String dbCoordSys = crossRefEl.getAttribute("dbCoordSys");
221                                        String dbAccessionId = crossRefEl.getAttribute("dbAccessionId");
222                                        String dbResNum = crossRefEl.getAttribute("dbResNum");
223                                        String dbResName = crossRefEl.getAttribute("dbResName");
224                                        String dbChainId = crossRefEl.getAttribute("dbChainId");
225
226                                //      System.out.println(dbSource + " " + dbCoordSys + " " + dbAccessionId + " " + dbResNum + " " + dbResName + " " + dbChainId);
227
228                                        if ( dbSource.equals("PDB") && ( dbCoordSys.equals("PDBresnum"))){
229                                                res.setPdbResNum(dbResNum);
230                                                res.setPdbResName(dbResName);
231                                                res.setChainId(dbChainId);
232                                                res.setPdbId(dbAccessionId);
233                                        } else if ( dbSource.equals("UniProt")){
234                                                res.setUniProtPos(Integer.parseInt(dbResNum));
235                                                res.setUniProtResName(dbResName);
236                                                res.setUniProtAccessionId(dbAccessionId);
237                                        }
238                                }
239                        }
240                        return res;
241                }
242
243
244
245                /**
246                 * I take a xml element and the tag name, look for the tag and get
247                 * the text content
248                 * i.e for <employee><name>John</name></employee> xml snippet if
249                 * the Element points to employee node and tagName is 'name' I will return John
250                 */
251                @SuppressWarnings("unused")
252                private String getTextValue(Element ele, String tagName) {
253                        String textVal = null;
254                        NodeList nl = ele.getElementsByTagName(tagName);
255                        if(nl != null && nl.getLength() > 0) {
256                                Element el = (Element)nl.item(0);
257                                textVal = el.getFirstChild().getNodeValue();
258                        }
259
260                        return textVal;
261                }
262
263        private List<String> getTextValues(Element ele, String tagName) {
264                List<String>values = new ArrayList<String>();
265                NodeList nl = ele.getElementsByTagName(tagName);
266                if(nl != null && nl.getLength() > 0) {
267                        for ( int i = 0 ;i < nl.getLength() ; i ++) {
268
269                                Element n = (Element) nl.item(i);
270
271                                @SuppressWarnings("unused")
272                                String k = n.getNodeName();
273
274                                String val = n.getFirstChild().getNodeValue();
275                                if ( val != null)
276                                        values.add(val);
277                        }
278                }
279
280                return values;
281        }
282
283
284
285
286
287
288
289        }