001/**
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This
005 * should be distributed with the code. If you do not have a copy, see:
006 *
007 * http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments.
010 *
011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page
012 * at:
013 *
014 * http://www.biojava.org/
015 *
016 * Created on 2013-06-13 Created by Douglas Myers-Turnbull
017 *
018 * @since 3.0.6
019 */
020package org.biojava.nbio.structure.rcsb;
021
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024import org.w3c.dom.Element;
025import org.w3c.dom.NodeList;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.net.URL;
030import java.util.ArrayList;
031import java.util.List;
032
033/**
034 * Fetches information from <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB's RESTful Web Service
035 * Interface</a>. A factory for {@link RCSBLigands RCSBLigands} from {@code ligandInfo} XML files. The factory methods
036 * will return null if the data was not found (rather than throwing an exception); client code should test for this.
037 * This is for consistency: if the factory could not read some part (corresponding to a field in a class in
038 * {@code rcsb.descriptions}) of the XML file, either because it was blank or contained an error that could not be
039 * safely ignored, that field will simply be null. This holds even for numerical values. On some parse errors, the error
040 * will additionally be printed to standard error.
041 *
042 * Example usage:
043 *
044 * <pre>
045 * RCSBLigands ligands = RCSBLigandsFactory.getFromPdbIds(&quot;1w0p&quot;);
046 * List&lt;RCSBLigand&gt; list = ligands.getLigands();
047 * System.out.println(list.get(0).getFormula()); // prints &quot;CA 2&quot;
048 * System.out.println(list.get(1).getFormula()); // prints &quot;C11 H19 N O9&quot;
049 * </pre>
050 *
051 * @see <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB RESTful</a>
052 *
053 * @author dmyerstu
054 * @since 3.0.6
055 */
056
057public class RCSBLigandsFactory {
058
059        private static final String HET_URL_STUB = "http://www.rcsb.org/pdb/rest/describeHet?chemicalID=";
060
061        private static final Logger logger = LoggerFactory.getLogger(RCSBLigandsFactory.class);
062
063        private static final String PDB_URL_STUB = "http://www.rcsb.org/pdb/rest/ligandInfo?structureId=";
064
065        /**
066         * @return A list of {@link RCSBLigand RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling
067         *         {@link #getFromHeteroAtomId(String)} if you want data directly from RCSB's RESTful service.
068         * @see RCSBDescriptionFactory#get(String)
069         */
070        public static RCSBLigand getFromHeteroAtomId(InputStream stream) {
071                return getFromHeteroAtomIds(stream).get(0);
072        }
073
074        /**
075         * @return An {@link RCSBLigands} from the XML file at
076         *         {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method,
077         *         unless a different URL or input source is required.
078         * @see RCSBDescriptionFactory#get(InputStream)
079         */
080        public static RCSBLigand getFromHeteroAtomId(String heteroAtomId) {
081                return getFromHeteroAtomIds(heteroAtomId).get(0);
082        }
083
084        /**
085         * @return A list of {@link RCSBLigand RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling
086         *         {@link #getFromHeteroAtomId(String)} if you want data directly from RCSB's RESTful service.
087         * @see RCSBDescriptionFactory#get(String)
088         */
089        public static List<RCSBLigand> getFromHeteroAtomIds(InputStream stream) {
090
091                NodeList data;
092                try {
093                        data = ReadUtils.getNodes(stream);
094                } catch (IOException e) {
095                        logger.warn("Couldn't parse XML", e);
096                        return null;
097                }
098
099                List<RCSBLigand> ligands = new ArrayList<RCSBLigand>();
100
101                // first get the ligandInfo
102                Element structureIdE = null;
103                for (int i = 0; i < data.getLength(); i++) {
104                        if (data.item(i).getNodeType() != 1) continue;
105                        structureIdE = (Element) data.item(i);
106                        if (structureIdE.getNodeName().equals("ligandInfo")) {
107                                break;
108                        }
109                }
110
111                // now get individual ligands
112                data = structureIdE.getChildNodes();
113                Element ligandE = null;
114                for (int i = 0; i < data.getLength(); i++) {
115                        if (data.item(i).getNodeType() != 1) continue;
116                        ligandE = (Element) data.item(i);
117                        if (ligandE.getNodeName().equals("ligand")) {
118                                RCSBLigand ligand = makeLigand(ligandE);
119                                ligands.add(ligand);
120                        }
121                }
122
123                return ligands;
124
125        }
126
127        /**
128         * @return An {@link RCSBLigands} from the XML file at
129         *         {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method,
130         *         unless a different URL or input source is required.
131         * @see RCSBDescriptionFactory#get(InputStream)
132         */
133        public static List<RCSBLigand> getFromHeteroAtomIds(List<String> heteroAtomIds) {
134                String[] x = new String[heteroAtomIds.size()];
135                heteroAtomIds.toArray(x);
136                return getFromHeteroAtomIds(x); // somewhat cheating here
137        }
138
139        /**
140         * @return An {@link RCSBLigands} from the XML file at
141         *         {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method,
142         *         unless a different URL or input source is required.
143         * @see RCSBDescriptionFactory#get(InputStream)
144         */
145        public static List<RCSBLigand> getFromHeteroAtomIds(String... heteroAtomIds) {
146                StringBuilder sb = new StringBuilder();
147                for (int i = 0; i < heteroAtomIds.length; i++) {
148                        if (i > 0) sb.append(",");
149                        sb.append(heteroAtomIds[i]);
150                }
151                InputStream is;
152                try {
153                        URL url = new URL(HET_URL_STUB + sb.toString());
154                        is = url.openConnection().getInputStream();
155                } catch (IOException e) {
156                        logger.warn("Couldn't open connection", e);
157                        return null;
158                }
159                return getFromHeteroAtomIds(is);
160        }
161
162        /**
163         * @return An {@link RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling
164         *         {@link #getFromPdbId(String)} if you want data directly from RCSB's RESTful service.
165         * @see RCSBDescriptionFactory#get(String)
166         */
167        public static RCSBLigands getFromPdbId(InputStream stream) {
168
169                NodeList data;
170                try {
171                        data = ReadUtils.getNodes(stream);
172                } catch (IOException e) {
173                        logger.warn("Couldn't parse XML", e);
174                        return null;
175                }
176
177                // first get the ligandInfo
178                RCSBLigands ligands = new RCSBLigands();
179                Element structureIdE = null;
180                for (int i = 0; i < data.getLength(); i++) {
181                        if (data.item(i).getNodeType() != 1) continue;
182                        structureIdE = (Element) data.item(i);
183                        if (structureIdE.getNodeName().equals("ligandInfo")) {
184                                break;
185                        }
186                }
187
188                // now get individual ligands
189                data = structureIdE.getChildNodes();
190                Element ligandE = null;
191                for (int i = 0; i < data.getLength(); i++) {
192                        if (data.item(i).getNodeType() != 1) continue;
193                        ligandE = (Element) data.item(i);
194                        if (ligandE.getNodeName().equals("ligand")) {
195                                if (ligands.getPdbId() == null) {
196                                        ligands.setPdbId(ligandE.getAttribute("structureId"));
197                                }
198                                RCSBLigand ligand = makeLigand(ligandE);
199                                ligands.addLigand(ligand);
200                        }
201                }
202
203                return ligands;
204
205        }
206
207        /**
208         * @return An {@link RCSBLigands} from the XML file at
209         *         {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory
210         *         method, unless a different URL or input source is required.
211         * @see RCSBDescriptionFactory#get(InputStream)
212         */
213        public static RCSBLigands getFromPdbId(String pdbId) {
214                InputStream is;
215                try {
216                        URL url = new URL(PDB_URL_STUB + pdbId);
217                        is = url.openConnection().getInputStream();
218                } catch (IOException e) {
219                        logger.warn("Couldn't open connection", e);
220                        return null;
221                }
222                return getFromPdbId(is);
223        }
224
225        /**
226         * @return An {@link RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling
227         *         {@link #getFromPdbId(String)} if you want data directly from RCSB's RESTful service.
228         * @see RCSBDescriptionFactory#get(String)
229         */
230        public static List<RCSBLigands> getFromPdbIds(InputStream stream) {
231
232                NodeList dataaa;
233                try {
234                        dataaa = ReadUtils.getNodes(stream);
235                } catch (IOException e) {
236                        logger.warn("Couldn't parse XML", e);
237                        return null;
238                }
239
240                // first we have to handle the element "ligandsInEntry", which is not present if we have only 1 structure
241
242                List<RCSBLigands> ligandsList = new ArrayList<RCSBLigands>();
243
244                Element structureIdE = null;
245
246                for (int k = 0; k < dataaa.getLength(); k++) {
247
248                        if (dataaa.item(k).getNodeType() != 1) continue;
249                        structureIdE = (Element) dataaa.item(k);
250                        if (structureIdE.getNodeName().equals("structureId")) {
251
252                                // now get the ligandInfo
253                                NodeList data = structureIdE.getChildNodes();
254                                RCSBLigands ligands = new RCSBLigands();
255                                Element ligandIdE = null;
256                                for (int i = 0; i < data.getLength(); i++) {
257                                        if (data.item(i).getNodeType() != 1) continue;
258                                        ligandIdE = (Element) data.item(i);
259                                        if (ligandIdE.getNodeName().equals("ligandInfo")) {
260                                                break;
261                                        }
262                                }
263
264                                // now get individual ligands
265                                data = ligandIdE.getChildNodes();
266                                Element ligandE = null;
267                                for (int i = 0; i < data.getLength(); i++) {
268                                        if (data.item(i).getNodeType() != 1) continue;
269                                        ligandE = (Element) data.item(i);
270                                        if (ligandE.getNodeName().equals("ligand")) {
271                                                if (ligands.getPdbId() == null) {
272                                                        ligands.setPdbId(ligandE.getAttribute("structureId"));
273                                                }
274                                                RCSBLigand ligand = makeLigand(ligandE);
275                                                ligands.addLigand(ligand);
276                                        }
277                                }
278
279                                ligandsList.add(ligands);
280
281                        }
282                }
283
284                return ligandsList;
285
286        }
287
288        /**
289         * @return An {@link RCSBLigands} from the XML file at
290         *         {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory
291         *         method, unless a different URL or input source is required.
292         * @see RCSBDescriptionFactory#get(InputStream)
293         */
294        public static List<RCSBLigands> getFromPdbIds(List<String> pdbIds) {
295                String[] x = new String[pdbIds.size()];
296                pdbIds.toArray(x);
297                return getFromPdbIds(x);
298        }
299
300        /**
301         * @return An {@link RCSBLigands} from the XML file at
302         *         {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory
303         *         method, unless a different URL or input source is required.
304         * @see RCSBDescriptionFactory#get(InputStream)
305         */
306        public static RCSBLigands getFromPdbIds(String pdbId) {
307                InputStream is;
308                try {
309                        URL url = new URL(PDB_URL_STUB + pdbId);
310                        is = url.openConnection().getInputStream();
311                } catch (IOException e) {
312                        logger.warn("Couldn't open connection", e);
313                        return null;
314                }
315                return getFromPdbId(is);
316        }
317
318        /**
319         * @return An {@link RCSBLigands} from the XML file at
320         *         {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory
321         *         method, unless a different URL or input source is required.
322         * @see RCSBDescriptionFactory#get(InputStream)
323         */
324        public static List<RCSBLigands> getFromPdbIds(String... pdbIds) {
325                InputStream is;
326                StringBuilder sb = new StringBuilder();
327                for (int i = 0; i < pdbIds.length; i++) {
328                        if (i > 0) sb.append(",");
329                        sb.append(pdbIds[i]);
330                }
331                try {
332                        URL url = new URL(PDB_URL_STUB + sb.toString());
333                        is = url.openConnection().getInputStream();
334                } catch (IOException e) {
335                        logger.warn("Couldn't open connection", e);
336                        return null;
337                }
338                return getFromPdbIds(is);
339        }
340
341        private static RCSBLigand makeLigand(Element ligandE) {
342                RCSBLigand ligand = new RCSBLigand();
343                ligand.setId(ligandE.getAttribute("chemicalID"));
344                ligand.setType(ligandE.getAttribute("type"));
345                ligand.setWeight(ReadUtils.toDouble(ligandE.getAttribute("molecularWeight")));
346                Element element = null;
347                NodeList data = ligandE.getChildNodes();
348                for (int i = 0; i < data.getLength(); i++) {
349                        if (data.item(i).getNodeType() != 1) continue;
350                        element = (Element) data.item(i);
351                        if (element.getNodeName().equals("chemicalName")) {
352                                ligand.setName(element.getTextContent());
353                        } else if (element.getNodeName().equals("formula")) {
354                                ligand.setFormula(element.getTextContent());
355                        } else if (element.getNodeName().equals("InChIKey")) {
356                                ligand.setInChIKey(element.getTextContent());
357                        } else if (element.getNodeName().equals("InChI")) {
358                                ligand.setInChI(element.getTextContent());
359                        } else if (element.getNodeName().equals("smiles")) {
360                                ligand.setSmiles(element.getTextContent());
361                        }
362                }
363                return ligand;
364        }
365
366}