001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This 005 * should be distributed with the code. If you do not have a copy, see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments. 010 * 011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page 012 * at: 013 * 014 * http://www.biojava.org/ 015 * 016 * Created on 2013-06-13 Created by Douglas Myers-Turnbull 017 * 018 * @since 3.0.6 019 */ 020package org.biojava.nbio.structure.rcsb; 021 022import org.w3c.dom.Element; 023import org.w3c.dom.NodeList; 024 025import java.io.IOException; 026import java.io.InputStream; 027import java.net.URL; 028import java.util.ArrayList; 029import java.util.List; 030import java.util.logging.Level; 031import java.util.logging.Logger; 032 033/** 034 * Fetches information from <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB's RESTful Web Service 035 * Interface</a>. A factory for {@link RCSBLigands RCSBLigands} from {@code ligandInfo} XML files. The factory methods 036 * will return null if the data was not found (rather than throwing an exception); client code should test for this. 037 * This is for consistency: if the factory could not read some part (corresponding to a field in a class in 038 * {@code rcsb.descriptions}) of the XML file, either because it was blank or contained an error that could not be 039 * safely ignored, that field will simply be null. This holds even for numerical values. On some parse errors, the error 040 * will additionally be printed to standard error. 041 * 042 * Example usage: 043 * 044 * <pre> 045 * RCSBLigands ligands = RCSBLigandsFactory.getFromPdbIds("1w0p"); 046 * List<RCSBLigand> list = ligands.getLigands(); 047 * System.out.println(list.get(0).getFormula()); // prints "CA 2" 048 * System.out.println(list.get(1).getFormula()); // prints "C11 H19 N O9" 049 * </pre> 050 * 051 * @see <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB RESTful</a> 052 * 053 * @author dmyerstu 054 * @since 3.0.6 055 */ 056 057public class RCSBLigandsFactory { 058 059 private static final String HET_URL_STUB = "http://www.rcsb.org/pdb/rest/describeHet?chemicalID="; 060 061 private static final Logger logger = Logger.getLogger(RCSBLigandsFactory.class.getPackage().getName()); 062 063 private static final String PDB_URL_STUB = "http://www.rcsb.org/pdb/rest/ligandInfo?structureId="; 064 065 /** 066 * @return A list of {@link RCSBLigand RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling 067 * {@link #getFromHeteroAtomId(String)} if you want data directly from RCSB's RESTful service. 068 * @see RCSBDescriptionFactory#get(String) 069 */ 070 public static RCSBLigand getFromHeteroAtomId(InputStream stream) { 071 return getFromHeteroAtomIds(stream).get(0); 072 } 073 074 /** 075 * @return An {@link RCSBLigands} from the XML file at 076 * {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method, 077 * unless a different URL or input source is required. 078 * @see RCSBDescriptionFactory#get(InputStream) 079 */ 080 public static RCSBLigand getFromHeteroAtomId(String heteroAtomId) { 081 return getFromHeteroAtomIds(heteroAtomId).get(0); 082 } 083 084 /** 085 * @return A list of {@link RCSBLigand RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling 086 * {@link #getFromHeteroAtomId(String)} if you want data directly from RCSB's RESTful service. 087 * @see RCSBDescriptionFactory#get(String) 088 */ 089 public static List<RCSBLigand> getFromHeteroAtomIds(InputStream stream) { 090 091 NodeList data; 092 try { 093 data = ReadUtils.getNodes(stream); 094 } catch (IOException e) { 095 logger.log(Level.WARNING,"Couldn't parse XML", e); 096 return null; 097 } 098 099 List<RCSBLigand> ligands = new ArrayList<RCSBLigand>(); 100 101 // first get the ligandInfo 102 Element structureIdE = null; 103 for (int i = 0; i < data.getLength(); i++) { 104 if (data.item(i).getNodeType() != 1) continue; 105 structureIdE = (Element) data.item(i); 106 if (structureIdE.getNodeName().equals("ligandInfo")) { 107 break; 108 } 109 } 110 111 // now get individual ligands 112 data = structureIdE.getChildNodes(); 113 Element ligandE = null; 114 for (int i = 0; i < data.getLength(); i++) { 115 if (data.item(i).getNodeType() != 1) continue; 116 ligandE = (Element) data.item(i); 117 if (ligandE.getNodeName().equals("ligand")) { 118 RCSBLigand ligand = makeLigand(ligandE); 119 ligands.add(ligand); 120 } 121 } 122 123 return ligands; 124 125 } 126 127 /** 128 * @return An {@link RCSBLigands} from the XML file at 129 * {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method, 130 * unless a different URL or input source is required. 131 * @see RCSBDescriptionFactory#get(InputStream) 132 */ 133 public static List<RCSBLigand> getFromHeteroAtomIds(List<String> heteroAtomIds) { 134 String[] x = new String[heteroAtomIds.size()]; 135 heteroAtomIds.toArray(x); 136 return getFromHeteroAtomIds(x); // somewhat cheating here 137 } 138 139 /** 140 * @return An {@link RCSBLigands} from the XML file at 141 * {@code "http://www.pdb.org/pdb/rest/describeHet?chemicalID=hetid"}. This is the preferred factory method, 142 * unless a different URL or input source is required. 143 * @see RCSBDescriptionFactory#get(InputStream) 144 */ 145 public static List<RCSBLigand> getFromHeteroAtomIds(String... heteroAtomIds) { 146 StringBuilder sb = new StringBuilder(); 147 for (int i = 0; i < heteroAtomIds.length; i++) { 148 if (i > 0) sb.append(","); 149 sb.append(heteroAtomIds[i]); 150 } 151 InputStream is; 152 try { 153 URL url = new URL(HET_URL_STUB + sb.toString()); 154 is = url.openConnection().getInputStream(); 155 } catch (IOException e) { 156 logger.log(Level.WARNING,"Couldn't open connection", e); 157 return null; 158 } 159 return getFromHeteroAtomIds(is); 160 } 161 162 /** 163 * @return An {@link RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling 164 * {@link #getFromPdbId(String)} if you want data directly from RCSB's RESTful service. 165 * @see RCSBDescriptionFactory#get(String) 166 */ 167 public static RCSBLigands getFromPdbId(InputStream stream) { 168 169 NodeList data; 170 try { 171 data = ReadUtils.getNodes(stream); 172 } catch (IOException e) { 173 logger.log(Level.WARNING,"Couldn't parse XML", e); 174 return null; 175 } 176 177 // first get the ligandInfo 178 RCSBLigands ligands = new RCSBLigands(); 179 Element structureIdE = null; 180 for (int i = 0; i < data.getLength(); i++) { 181 if (data.item(i).getNodeType() != 1) continue; 182 structureIdE = (Element) data.item(i); 183 if (structureIdE.getNodeName().equals("ligandInfo")) { 184 break; 185 } 186 } 187 188 // now get individual ligands 189 data = structureIdE.getChildNodes(); 190 Element ligandE = null; 191 for (int i = 0; i < data.getLength(); i++) { 192 if (data.item(i).getNodeType() != 1) continue; 193 ligandE = (Element) data.item(i); 194 if (ligandE.getNodeName().equals("ligand")) { 195 if (ligands.getPdbId() == null) { 196 ligands.setPdbId(ligandE.getAttribute("structureId")); 197 } 198 RCSBLigand ligand = makeLigand(ligandE); 199 ligands.addLigand(ligand); 200 } 201 } 202 203 return ligands; 204 205 } 206 207 /** 208 * @return An {@link RCSBLigands} from the XML file at 209 * {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory 210 * method, unless a different URL or input source is required. 211 * @see RCSBDescriptionFactory#get(InputStream) 212 */ 213 public static RCSBLigands getFromPdbId(String pdbId) { 214 InputStream is; 215 try { 216 URL url = new URL(PDB_URL_STUB + pdbId); 217 is = url.openConnection().getInputStream(); 218 } catch (IOException e) { 219 logger.log(Level.WARNING,"Couldn't open connection", e); 220 return null; 221 } 222 return getFromPdbId(is); 223 } 224 225 /** 226 * @return An {@link RCSBLigands} from the XML file loaded as {@code stream}. Prefer calling 227 * {@link #getFromPdbId(String)} if you want data directly from RCSB's RESTful service. 228 * @see RCSBDescriptionFactory#get(String) 229 */ 230 public static List<RCSBLigands> getFromPdbIds(InputStream stream) { 231 232 NodeList dataaa; 233 try { 234 dataaa = ReadUtils.getNodes(stream); 235 } catch (IOException e) { 236 logger.log(Level.WARNING,"Couldn't parse XML", e); 237 return null; 238 } 239 240 // first we have to handle the element "ligandsInEntry", which is not present if we have only 1 structure 241 242 List<RCSBLigands> ligandsList = new ArrayList<RCSBLigands>(); 243 244 Element structureIdE = null; 245 246 for (int k = 0; k < dataaa.getLength(); k++) { 247 248 if (dataaa.item(k).getNodeType() != 1) continue; 249 structureIdE = (Element) dataaa.item(k); 250 if (structureIdE.getNodeName().equals("structureId")) { 251 252 // now get the ligandInfo 253 NodeList data = structureIdE.getChildNodes(); 254 RCSBLigands ligands = new RCSBLigands(); 255 Element ligandIdE = null; 256 for (int i = 0; i < data.getLength(); i++) { 257 if (data.item(i).getNodeType() != 1) continue; 258 ligandIdE = (Element) data.item(i); 259 if (ligandIdE.getNodeName().equals("ligandInfo")) { 260 break; 261 } 262 } 263 264 // now get individual ligands 265 data = ligandIdE.getChildNodes(); 266 Element ligandE = null; 267 for (int i = 0; i < data.getLength(); i++) { 268 if (data.item(i).getNodeType() != 1) continue; 269 ligandE = (Element) data.item(i); 270 if (ligandE.getNodeName().equals("ligand")) { 271 if (ligands.getPdbId() == null) { 272 ligands.setPdbId(ligandE.getAttribute("structureId")); 273 } 274 RCSBLigand ligand = makeLigand(ligandE); 275 ligands.addLigand(ligand); 276 } 277 } 278 279 ligandsList.add(ligands); 280 281 } 282 } 283 284 return ligandsList; 285 286 } 287 288 /** 289 * @return An {@link RCSBLigands} from the XML file at 290 * {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory 291 * method, unless a different URL or input source is required. 292 * @see RCSBDescriptionFactory#get(InputStream) 293 */ 294 public static List<RCSBLigands> getFromPdbIds(List<String> pdbIds) { 295 String[] x = new String[pdbIds.size()]; 296 pdbIds.toArray(x); 297 return getFromPdbIds(x); 298 } 299 300 /** 301 * @return An {@link RCSBLigands} from the XML file at 302 * {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory 303 * method, unless a different URL or input source is required. 304 * @see RCSBDescriptionFactory#get(InputStream) 305 */ 306 public static RCSBLigands getFromPdbIds(String pdbId) { 307 InputStream is; 308 try { 309 URL url = new URL(PDB_URL_STUB + pdbId); 310 is = url.openConnection().getInputStream(); 311 } catch (IOException e) { 312 logger.log(Level.WARNING,"Couldn't open connection", e); 313 return null; 314 } 315 return getFromPdbId(is); 316 } 317 318 /** 319 * @return An {@link RCSBLigands} from the XML file at 320 * {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory 321 * method, unless a different URL or input source is required. 322 * @see RCSBDescriptionFactory#get(InputStream) 323 */ 324 public static List<RCSBLigands> getFromPdbIds(String... pdbIds) { 325 InputStream is; 326 StringBuilder sb = new StringBuilder(); 327 for (int i = 0; i < pdbIds.length; i++) { 328 if (i > 0) sb.append(","); 329 sb.append(pdbIds[i]); 330 } 331 try { 332 URL url = new URL(PDB_URL_STUB + sb.toString()); 333 is = url.openConnection().getInputStream(); 334 } catch (IOException e) { 335 logger.log(Level.WARNING,"Couldn't open connection", e); 336 return null; 337 } 338 return getFromPdbIds(is); 339 } 340 341 private static RCSBLigand makeLigand(Element ligandE) { 342 RCSBLigand ligand = new RCSBLigand(); 343 ligand.setId(ligandE.getAttribute("chemicalID")); 344 ligand.setType(ligandE.getAttribute("type")); 345 ligand.setWeight(ReadUtils.toDouble(ligandE.getAttribute("molecularWeight"))); 346 Element element = null; 347 NodeList data = ligandE.getChildNodes(); 348 for (int i = 0; i < data.getLength(); i++) { 349 if (data.item(i).getNodeType() != 1) continue; 350 element = (Element) data.item(i); 351 if (element.getNodeName().equals("chemicalName")) { 352 ligand.setName(element.getTextContent()); 353 } else if (element.getNodeName().equals("formula")) { 354 ligand.setFormula(element.getTextContent()); 355 } else if (element.getNodeName().equals("InChIKey")) { 356 ligand.setInChIKey(element.getTextContent()); 357 } else if (element.getNodeName().equals("InChI")) { 358 ligand.setInChI(element.getTextContent()); 359 } else if (element.getNodeName().equals("smiles")) { 360 ligand.setSmiles(element.getTextContent()); 361 } 362 } 363 return ligand; 364 } 365 366}