001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.rcsb; 022 023import java.io.*; 024import java.net.URL; 025import java.net.URLConnection; 026import java.net.URLEncoder; 027import java.util.*; 028 029/** 030 * Utility classes for retrieving lists of PDB IDs. 031 * 032 * @author Andreas Prlic 033 * @since 4.2.0 034 */ 035public class PdbIdLists { 036 037 /** get the list of current PDB IDs 038 * 039 * @return list of current PDB IDs 040 * @throws IOException 041 */ 042 public static Set<String> getCurrentPDBIds() throws IOException { 043 String xml ="<orgPdbQuery>\n" + 044 " <version>head</version>\n" + 045 " <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" + 046 " <description>Holdings : All Structures</description>\n" + 047 " <experimentalMethod>ignore</experimentalMethod>\n" + 048 " <moleculeType>ignore</moleculeType>\n" + 049 " </orgPdbQuery>"; 050 051 return postQuery(xml); 052 } 053 054 055 /** Get the PDB IDs of all virus structures in the current PDB 056 * 057 * @return list of all virus structures 058 * @throws IOException 059 */ 060 public static Set<String> getAllViruses() throws IOException{ 061 String xml = "<orgPdbQuery>\n" + 062 " <version>head</version>\n" + 063 " <queryType>org.pdb.query.simple.EntriesOfEntitiesQuery</queryType>\n" + 064 " <description>Entries of :Oligomeric state Search : Min Number of oligomeric state=PAU\n" + 065 " and\n" + 066 " TaxonomyTree Search for Viruses\n" + 067 " </description>\n" + 068 " <parent><![CDATA[<orgPdbCompositeQuery version=\"1.0\">\n" + 069 " <queryRefinement>\n" + 070 " <queryRefinementLevel>0</queryRefinementLevel>\n" + 071 " <orgPdbQuery>\n" + 072 " <version>head</version>\n" + 073 " <queryType>org.pdb.query.simple.BiolUnitQuery</queryType>\n" + 074 " <description>Oligomeric state Search : Min Number of oligomeric state=PAU </description>\n" + 075 " <oligomeric_statemin>PAU</oligomeric_statemin>\n" + 076 " </orgPdbQuery>\n" + 077 " </queryRefinement>\n" + 078 " <queryRefinement>\n" + 079 " <queryRefinementLevel>1</queryRefinementLevel>\n" + 080 " <conjunctionType>and</conjunctionType>\n" + 081 " <orgPdbQuery>\n" + 082 " <version>head</version>\n" + 083 " <queryType>org.pdb.query.simple.TreeEntityQuery</queryType>\n" + 084 " <description>TaxonomyTree Search for Viruses</description>\n" + 085 " <t>1</t>\n" + 086 " <n>10239</n>\n" + 087 " <nodeDesc>Viruses</nodeDesc>\n" + 088 " </orgPdbQuery>\n" + 089 " </queryRefinement>\n" + 090 " </orgPdbCompositeQuery>]]></parent>\n" + 091 " </orgPdbQuery>"; 092 093 return postQuery(xml); 094 } 095 096 097 /** get list of all current NMR structures 098 * 099 * @return list of NMR structures 100 * @throws IOException 101 */ 102 public static Set<String> getNMRStructures() throws IOException{ 103 String xml = "<orgPdbCompositeQuery version=\"1.0\">\n" + 104 " <queryRefinement>\n" + 105 " <queryRefinementLevel>0</queryRefinementLevel>\n" + 106 " <orgPdbQuery>\n" + 107 " <version>head</version>\n" + 108 " <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" + 109 " <description>Holdings : All Structures</description>\n" + 110 " <experimentalMethod>ignore</experimentalMethod>\n" + 111 " <moleculeType>ignore</moleculeType>\n" + 112 " </orgPdbQuery>\n" + 113 " </queryRefinement>\n" + 114 " <queryRefinement>\n" + 115 " <queryRefinementLevel>1</queryRefinementLevel>\n" + 116 " <conjunctionType>and</conjunctionType>\n" + 117 " <orgPdbQuery>\n" + 118 " <version>head</version>\n" + 119 " <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>\n" + 120 " <description>Experimental Method is SOLUTION NMR</description>\n" + 121 " <mvStructure.expMethod.value>SOLUTION NMR</mvStructure.expMethod.value>\n" + 122 " <mvStructure.expMethod.exclusive>y</mvStructure.expMethod.exclusive>\n" + 123 " </orgPdbQuery>\n" + 124 " </queryRefinement>\n" + 125 "</orgPdbCompositeQuery>\n"; 126 127 128 return postQuery(xml); 129 } 130 131 132 /** get all PDB IDs of gag-polyproteins 133 * 134 * @return list of PDB IDs 135 * @throws IOException 136 */ 137 public static Set<String> getGagPolyproteins() throws IOException { 138 String xml = "<orgPdbCompositeQuery version=\"1.0\">\n" + 139 " <queryRefinement>\n" + 140 " <queryRefinementLevel>0</queryRefinementLevel>\n" + 141 " <orgPdbQuery>\n" + 142 " <version>head</version>\n" + 143 " <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" + 144 " <description>Holdings : All Structures</description>\n" + 145 " <experimentalMethod>ignore</experimentalMethod>\n" + 146 " <moleculeType>ignore</moleculeType>\n" + 147 " </orgPdbQuery>\n" + 148 " </queryRefinement>\n" + 149 " <queryRefinement>\n" + 150 " <queryRefinementLevel>1</queryRefinementLevel>\n" + 151 " <conjunctionType>and</conjunctionType>\n" + 152 " <orgPdbQuery>\n" + 153 " <version>head</version>\n" + 154 " <queryType>org.pdb.query.simple.MacroMoleculeQuery</queryType>\n" + 155 " <description>Molecule : Gag-Pol polyprotein [A1Z651, O12158, P03355, P03366, P03367, P03369, P04584, P04585, P04586, P04587, P04588, P05896, P05897, P05959, P05961, P0C6F2, P12497, P12499, P18042, P19505 ... ]</description>\n" + 156 " <macromoleculeName>A1Z651,O12158,P03355,P03366,P03367,P03369,P04584,P04585,P04586,P04587,P04588,P05896,P05897,P05959,P05961,P0C6F2,P12497,P12499,P18042,P19505,P19560,P20875,P24740,P35963,Q699E2,Q70XD7,Q72547,Q7SMT3,Q7SPG9,Q90VT5</macromoleculeName>\n" + 157 " </orgPdbQuery>\n" + 158 " </queryRefinement>\n" + 159 "</orgPdbCompositeQuery>"; 160 161 return postQuery(xml); 162 } 163 164 /** get all Transmembrane proteins 165 * 166 * @return list of PDB IDs 167 * @throws IOException 168 */ 169 public static Set<String> getTransmembraneProteins() throws IOException { 170 String xml = " <orgPdbQuery>\n" + 171 " <version>head</version>\n" + 172 " <queryType>org.pdb.query.simple.TreeQuery</queryType>\n" + 173 " <description>TransmembraneTree Search for root</description>\n" + 174 " <t>19</t>\n" + 175 " <n>0</n>\n" + 176 " <nodeDesc>root</nodeDesc>\n" + 177 " </orgPdbQuery>"; 178 179 return postQuery(xml); 180 } 181 182 public static Set<String> getNucleotides() throws IOException{ 183 String xml ="<orgPdbQuery>\n" + 184 " <version>head</version>\n" + 185 " <queryType>org.pdb.query.simple.ChainTypeQuery</queryType>\n" + 186 " <description>Chain Type: there is not any Protein chain</description>\n" + 187 " <containsProtein>N</containsProtein>\n" + 188 " <containsDna>?</containsDna>\n" + 189 " <containsRna>?</containsRna>\n" + 190 " <containsHybrid>?</containsHybrid>\n" + 191 " </orgPdbQuery>"; 192 return postQuery(xml); 193 } 194 195 public static Set<String>getRibosomes() throws IOException{ 196 String xml = "<orgPdbQuery>\n" + 197 " <version>head</version>\n" + 198 " <queryType>org.pdb.query.simple.StructureKeywordsQuery</queryType>\n" + 199 " <description>StructureKeywordsQuery: struct_keywords.pdbx_keywords.comparator=contains struct_keywords.pdbx_keywords.value=RIBOSOME </description>\n" + 200 " <struct_keywords.pdbx_keywords.comparator>contains</struct_keywords.pdbx_keywords.comparator>\n" + 201 " <struct_keywords.pdbx_keywords.value>RIBOSOME</struct_keywords.pdbx_keywords.value>\n" + 202 " </orgPdbQuery>"; 203 204 return postQuery(xml); 205 } 206 207 public static final String SERVICELOCATION="http://www.rcsb.org/pdb/rest/search"; 208 209 210 /** post am XML query (PDB XML query format) to the RESTful RCSB web service 211 * 212 * @param xml 213 * @return a list of PDB ids. 214 */ 215 public static Set<String> postQuery(String xml) 216 throws IOException{ 217 218 //System.out.println(xml); 219 220 221 URL u = new URL(SERVICELOCATION); 222 223 224 String encodedXML = URLEncoder.encode(xml,"UTF-8"); 225 226 227 InputStream in = doPOST(u,encodedXML); 228 229 Set<String> pdbIds = new TreeSet<String>(); 230 231 232 try (BufferedReader rd = new BufferedReader(new InputStreamReader(in))) { 233 234 String line; 235 while ((line = rd.readLine()) != null) { 236 237 pdbIds.add(line); 238 239 } 240 rd.close(); 241 } 242 243 244 return pdbIds; 245 246 247 248 } 249 250 /** do a POST to a URL and return the response stream for further processing elsewhere. 251 * 252 * 253 * @param url 254 * @return 255 * @throws IOException 256 */ 257 public static InputStream doPOST(URL url, String data) 258 259 throws IOException 260 { 261 262 // Send data 263 264 URLConnection conn = url.openConnection(); 265 266 conn.setDoOutput(true); 267 268 try(OutputStreamWriter wr = new OutputStreamWriter(conn.getOutputStream())) { 269 270 wr.write(data); 271 wr.flush(); 272 } 273 274 275 // Get the response 276 return conn.getInputStream(); 277 278 }; 279 280 public static void main(String[] args){ 281 try { 282 System.out.println("Current PDB status: " + getCurrentPDBIds().size()); 283 System.out.println("Virus structures: " + getAllViruses().size()); 284 System.out.println("NMR structures: " + getNMRStructures().size()); 285 System.out.println("Gag-polyproteins: " + getGagPolyproteins().size()); 286 System.out.println("Transmembrane proteins: " + getTransmembraneProteins().size()); 287 System.out.println("Nucleotide: " + getNucleotides().size()); 288 System.out.println("Ribosomes: " + getRibosomes().size()); 289 } catch ( Exception e){ 290 e.printStackTrace(); 291 } 292 } 293}