001/**
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This
005 * should be distributed with the code. If you do not have a copy, see:
006 *
007 * http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments.
010 *
011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page
012 * at:
013 *
014 * http://www.biojava.org/
015 *
016 * Created on 2012-11-20 Created by Douglas Myers-Turnbull
017 *
018 * @since 3.0.6
019 */
020package org.biojava.nbio.structure.rcsb;
021
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024import org.w3c.dom.Element;
025import org.w3c.dom.NodeList;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.net.URL;
030
031/**
032 * Fetches information from <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB's RESTful Web Service
033 * Interface</a>. A factory for {@link RCSBDescription RCSBDescriptions} from {@code describeMol} XML files. The factory
034 * methods will return null if the data was not found (rather than throwing an exception); client code should test for
035 * this. This is for consistency: if the factory could not read some part (corresponding to a field in a class in
036 * {@code rcsb.descriptions}) of the XML file, either because it was blank or contained an error that could not be
037 * safely ignored, that field will simply be null. This holds even for numerical values. On some parse errors, the error
038 * will additionally be printed to standard error.
039 *
040 * Example usage:
041 *
042 * <pre>
043 * RCSBDescription description = RCSBDescriptionFactory.get(&quot;1w0p&quot;);
044 * RCSBLigand firstLigand = ligands.getLigands().get(0);
045 * System.out.println(description.getPdbId()); // prints &quot;1w0p&quot;
046 * </pre>
047 *
048 * @see <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB RESTful</a>
049 *
050 *      TODO: Handle queries with more than 1 PDB Id.
051 *
052 * @author dmyerstu
053 * @since 3.0.6
054 */
055public class RCSBDescriptionFactory {
056
057        private static final Logger logger = LoggerFactory.getLogger(RCSBDescriptionFactory.class);
058
059        private static final String URL_STUB = "http://www.rcsb.org/pdb/rest/describeMol?structureId=";
060
061        /**
062         * @return An {@link RCSBDescription} from the XML file loaded as {@code stream}. Prefer calling
063         *         {@link #get(String)} if you want data directly from RCSB's RESTful service.
064         * @see RCSBDescriptionFactory#get(String)
065         */
066        public static RCSBDescription get(InputStream stream) {
067
068                NodeList data;
069                try {
070                        data = ReadUtils.getNodes(stream);
071                } catch (IOException e) {
072                        logger.warn("Couldn't parse XML", e);
073                        return null;
074                }
075
076                // first get the main info
077                RCSBDescription description = new RCSBDescription();
078                Element structureIdE = null;
079                for (int i = 0; i < data.getLength(); i++) {
080                        if (data.item(i).getNodeType() != 1) continue;
081                        structureIdE = (Element) data.item(i);
082                        if (structureIdE.getNodeName().equals("structureId")) {
083                                description.setPdbId(structureIdE.getAttribute("id"));
084                        }
085                }
086
087                // now get polymers
088                data = structureIdE.getChildNodes();
089                Element polymerE = null;
090                for (int i = 0; i < data.getLength(); i++) {
091                        if (data.item(i).getNodeType() != 1) continue;
092                        polymerE = (Element) data.item(i);
093                        if (polymerE.getNodeName().equals("polymer")) {
094                                RCSBPolymer polymer = makePolymer(polymerE);
095                                description.addPolymer(polymer);
096                        }
097                }
098
099                return description;
100
101        }
102
103        /**
104         * @return An {@link RCSBDescription} from the XML file at
105         *         {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory
106         *         method, unless a different URL or input source is required.
107         * @see RCSBDescriptionFactory#get(InputStream)
108         */
109        public static RCSBDescription get(String pdbId) {
110                InputStream is;
111                try {
112                        URL url = new URL(URL_STUB + pdbId);
113                        is = url.openConnection().getInputStream();
114                } catch (IOException e) {
115                        logger.warn("Couldn't open connection", e);
116                        return null;
117                }
118                return get(is);
119        }
120
121        private static RCSBMacromolecule makeMolecule(Element moleculeE) {
122                RCSBMacromolecule molecule = new RCSBMacromolecule();
123                molecule.setName(moleculeE.getAttribute("name"));
124                Element element = null;
125                NodeList data = moleculeE.getChildNodes();
126                for (int i = 0; i < data.getLength(); i++) {
127                        if (data.item(i).getNodeType() != 1) continue;
128                        element = (Element) data.item(i);
129                        if (element.getNodeName().equals("accession")) {
130                                molecule.addAccession(element.getAttribute("id"));
131                        }
132                }
133                return molecule;
134        }
135
136        private static RCSBPolymer makePolymer(Element polymerE) {
137
138                RCSBPolymer polymer = new RCSBPolymer();
139                polymer.setIndex(ReadUtils.toInt(polymerE.getAttribute("entityNr")));
140                polymer.setLength(ReadUtils.toInt(polymerE.getAttribute("length")));
141                polymer.setWeight(ReadUtils.toDouble(polymerE.getAttribute("weight")));
142                polymer.setType(ReadUtils.toStr(polymerE.getAttribute("type")));
143
144                Element element = null;
145                NodeList data = polymerE.getChildNodes();
146                for (int i = 0; i < data.getLength(); i++) {
147                        if (data.item(i).getNodeType() != 1) continue;
148                        element = (Element) data.item(i);
149                        if (element.getNodeName().equals("chain")) {
150                                parseChains(polymer, element.getAttribute("id"));
151                        } else if (element.getNodeName().equals("Taxonomy")) {
152                                String name = element.getAttribute("name");
153                                int id = ReadUtils.toInt(element.getAttribute("id"));
154                                RCSBTaxonomy taxonomy = new RCSBTaxonomy(name, id);
155                                polymer.setTaxonomy(taxonomy);
156                        } else if (element.getNodeName().equals("macroMolecule")) {
157                                RCSBMacromolecule molecule = makeMolecule(element);
158                                polymer.setMolecule(molecule);
159                        } else if (element.getNodeName().equals("polymerDescription")) {
160                                polymer.setDescription(element.getAttribute("description"));
161                        } else if (element.getNodeName().equals("enzClass")) {
162                                polymer.setEnzClass(element.getAttribute("ec"));
163                        } else if (element.getNodeName().equals("synonym")) {
164                                parseSynonyms(polymer, element.getAttribute("name"));
165                        }
166                }
167                return polymer;
168        }
169
170        private static void parseChains(RCSBPolymer polymer, String string) {
171                String[] parts = string.split("\\s*,\\s*");
172                for (String part : parts) {
173                        if (part.length() == 1) {
174                                polymer.addChain(part.charAt(0));
175                        } else {
176                                logger.warn("Chain id contained more than one character");
177                        }
178                }
179        }
180
181        private static void parseSynonyms(RCSBPolymer polymer, String string) {
182                String[] parts = string.split("\\s*,\\s*");
183                for (String part : parts) {
184                        polymer.addSynonym(part);
185                }
186        }
187
188}