001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This 005 * should be distributed with the code. If you do not have a copy, see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments. 010 * 011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page 012 * at: 013 * 014 * http://www.biojava.org/ 015 * 016 * Created on 2012-11-20 Created by Douglas Myers-Turnbull 017 * 018 * @since 3.0.6 019 */ 020package org.biojava.nbio.structure.rcsb; 021 022import org.slf4j.Logger; 023import org.slf4j.LoggerFactory; 024import org.w3c.dom.Element; 025import org.w3c.dom.NodeList; 026 027import java.io.IOException; 028import java.io.InputStream; 029import java.net.URL; 030 031/** 032 * Fetches information from <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB's RESTful Web Service 033 * Interface</a>. A factory for {@link RCSBDescription RCSBDescriptions} from {@code describeMol} XML files. The factory 034 * methods will return null if the data was not found (rather than throwing an exception); client code should test for 035 * this. This is for consistency: if the factory could not read some part (corresponding to a field in a class in 036 * {@code rcsb.descriptions}) of the XML file, either because it was blank or contained an error that could not be 037 * safely ignored, that field will simply be null. This holds even for numerical values. On some parse errors, the error 038 * will additionally be printed to standard error. 039 * 040 * Example usage: 041 * 042 * <pre> 043 * RCSBDescription description = RCSBDescriptionFactory.get("1w0p"); 044 * RCSBLigand firstLigand = ligands.getLigands().get(0); 045 * System.out.println(description.getPdbId()); // prints "1w0p" 046 * </pre> 047 * 048 * @see <a href="http://www.pdb.org/pdb/software/rest.do#descPDB">RCSB RESTful</a> 049 * 050 * TODO: Handle queries with more than 1 PDB Id. 051 * 052 * @author dmyerstu 053 * @since 3.0.6 054 */ 055public class RCSBDescriptionFactory { 056 057 private static final Logger logger = LoggerFactory.getLogger(RCSBDescriptionFactory.class); 058 059 private static final String URL_STUB = "http://www.rcsb.org/pdb/rest/describeMol?structureId="; 060 061 /** 062 * @return An {@link RCSBDescription} from the XML file loaded as {@code stream}. Prefer calling 063 * {@link #get(String)} if you want data directly from RCSB's RESTful service. 064 * @see RCSBDescriptionFactory#get(String) 065 */ 066 public static RCSBDescription get(InputStream stream) { 067 068 NodeList data; 069 try { 070 data = ReadUtils.getNodes(stream); 071 } catch (IOException e) { 072 logger.warn("Couldn't parse XML", e); 073 return null; 074 } 075 076 // first get the main info 077 RCSBDescription description = new RCSBDescription(); 078 Element structureIdE = null; 079 for (int i = 0; i < data.getLength(); i++) { 080 if (data.item(i).getNodeType() != 1) continue; 081 structureIdE = (Element) data.item(i); 082 if (structureIdE.getNodeName().equals("structureId")) { 083 description.setPdbId(structureIdE.getAttribute("id")); 084 } 085 } 086 087 // now get polymers 088 data = structureIdE.getChildNodes(); 089 Element polymerE = null; 090 for (int i = 0; i < data.getLength(); i++) { 091 if (data.item(i).getNodeType() != 1) continue; 092 polymerE = (Element) data.item(i); 093 if (polymerE.getNodeName().equals("polymer")) { 094 RCSBPolymer polymer = makePolymer(polymerE); 095 description.addPolymer(polymer); 096 } 097 } 098 099 return description; 100 101 } 102 103 /** 104 * @return An {@link RCSBDescription} from the XML file at 105 * {@code "http://www.pdb.org/pdb/rest/describeMol?structureId=pdbId"}. This is the preferred factory 106 * method, unless a different URL or input source is required. 107 * @see RCSBDescriptionFactory#get(InputStream) 108 */ 109 public static RCSBDescription get(String pdbId) { 110 InputStream is; 111 try { 112 URL url = new URL(URL_STUB + pdbId); 113 is = url.openConnection().getInputStream(); 114 } catch (IOException e) { 115 logger.warn("Couldn't open connection", e); 116 return null; 117 } 118 return get(is); 119 } 120 121 private static RCSBMacromolecule makeMolecule(Element moleculeE) { 122 RCSBMacromolecule molecule = new RCSBMacromolecule(); 123 molecule.setName(moleculeE.getAttribute("name")); 124 Element element = null; 125 NodeList data = moleculeE.getChildNodes(); 126 for (int i = 0; i < data.getLength(); i++) { 127 if (data.item(i).getNodeType() != 1) continue; 128 element = (Element) data.item(i); 129 if (element.getNodeName().equals("accession")) { 130 molecule.addAccession(element.getAttribute("id")); 131 } 132 } 133 return molecule; 134 } 135 136 private static RCSBPolymer makePolymer(Element polymerE) { 137 138 RCSBPolymer polymer = new RCSBPolymer(); 139 polymer.setIndex(ReadUtils.toInt(polymerE.getAttribute("entityNr"))); 140 polymer.setLength(ReadUtils.toInt(polymerE.getAttribute("length"))); 141 polymer.setWeight(ReadUtils.toDouble(polymerE.getAttribute("weight"))); 142 polymer.setType(ReadUtils.toStr(polymerE.getAttribute("type"))); 143 144 Element element = null; 145 NodeList data = polymerE.getChildNodes(); 146 for (int i = 0; i < data.getLength(); i++) { 147 if (data.item(i).getNodeType() != 1) continue; 148 element = (Element) data.item(i); 149 if (element.getNodeName().equals("chain")) { 150 parseChains(polymer, element.getAttribute("id")); 151 } else if (element.getNodeName().equals("Taxonomy")) { 152 String name = element.getAttribute("name"); 153 int id = ReadUtils.toInt(element.getAttribute("id")); 154 RCSBTaxonomy taxonomy = new RCSBTaxonomy(name, id); 155 polymer.setTaxonomy(taxonomy); 156 } else if (element.getNodeName().equals("macroMolecule")) { 157 RCSBMacromolecule molecule = makeMolecule(element); 158 polymer.setMolecule(molecule); 159 } else if (element.getNodeName().equals("polymerDescription")) { 160 polymer.setDescription(element.getAttribute("description")); 161 } else if (element.getNodeName().equals("enzClass")) { 162 polymer.setEnzClass(element.getAttribute("ec")); 163 } else if (element.getNodeName().equals("synonym")) { 164 parseSynonyms(polymer, element.getAttribute("name")); 165 } 166 } 167 return polymer; 168 } 169 170 private static void parseChains(RCSBPolymer polymer, String string) { 171 String[] parts = string.split("\\s*,\\s*"); 172 for (String part : parts) { 173 if (part.length() == 1) { 174 polymer.addChain(part.charAt(0)); 175 } else { 176 logger.warn("Chain id contained more than one character"); 177 } 178 } 179 } 180 181 private static void parseSynonyms(RCSBPolymer polymer, String string) { 182 String[] parts = string.split("\\s*,\\s*"); 183 for (String part : parts) { 184 polymer.addSynonym(part); 185 } 186 } 187 188}