001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.rcsb;
022
023import java.io.*;
024import java.net.URL;
025import java.net.URLConnection;
026import java.net.URLEncoder;
027import java.util.*;
028
029/**
030 *  Utility classes for retrieving lists of PDB IDs.
031 *
032 *  @author  Andreas Prlic
033 *  @since 4.2.0
034 */
035public class PdbIdLists {
036
037        /** get the list of current PDB IDs
038         *
039         * @return list of current PDB IDs
040         * @throws IOException
041         */
042        public static Set<String> getCurrentPDBIds() throws IOException {
043           String xml ="<orgPdbQuery>\n" +
044                           "    <version>head</version>\n" +
045                           "    <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" +
046                           "    <description>Holdings : All Structures</description>\n" +
047                           "    <experimentalMethod>ignore</experimentalMethod>\n" +
048                           "    <moleculeType>ignore</moleculeType>\n" +
049                           "  </orgPdbQuery>";
050
051                return postQuery(xml);
052        }
053
054
055        /** Get the PDB IDs of all virus structures in the current PDB
056         *
057         * @return list of all virus structures
058         * @throws IOException
059         */
060        public static Set<String> getAllViruses() throws IOException{
061                String xml = "<orgPdbQuery>\n" +
062                                "        <version>head</version>\n" +
063                                "        <queryType>org.pdb.query.simple.EntriesOfEntitiesQuery</queryType>\n" +
064                                "        <description>Entries of :Oligomeric state Search : Min Number of oligomeric state=PAU\n" +
065                                "        and\n" +
066                                "        TaxonomyTree Search for Viruses\n" +
067                                "                </description>\n" +
068                                "        <parent><![CDATA[<orgPdbCompositeQuery version=\"1.0\">\n" +
069                                "        <queryRefinement>\n" +
070                                "        <queryRefinementLevel>0</queryRefinementLevel>\n" +
071                                "        <orgPdbQuery>\n" +
072                                "        <version>head</version>\n" +
073                                "        <queryType>org.pdb.query.simple.BiolUnitQuery</queryType>\n" +
074                                "        <description>Oligomeric state Search : Min Number of oligomeric state=PAU </description>\n" +
075                                "        <oligomeric_statemin>PAU</oligomeric_statemin>\n" +
076                                "        </orgPdbQuery>\n" +
077                                "        </queryRefinement>\n" +
078                                "        <queryRefinement>\n" +
079                                "        <queryRefinementLevel>1</queryRefinementLevel>\n" +
080                                "        <conjunctionType>and</conjunctionType>\n" +
081                                "        <orgPdbQuery>\n" +
082                                "        <version>head</version>\n" +
083                                "        <queryType>org.pdb.query.simple.TreeEntityQuery</queryType>\n" +
084                                "        <description>TaxonomyTree Search for Viruses</description>\n" +
085                                "        <t>1</t>\n" +
086                                "        <n>10239</n>\n" +
087                                "        <nodeDesc>Viruses</nodeDesc>\n" +
088                                "        </orgPdbQuery>\n" +
089                                "        </queryRefinement>\n" +
090                                "        </orgPdbCompositeQuery>]]></parent>\n" +
091                                "        </orgPdbQuery>";
092
093                return postQuery(xml);
094        }
095
096
097        /** get list of all current NMR structures
098         *
099         * @return list of NMR structures
100         * @throws IOException
101         */
102        public static Set<String> getNMRStructures() throws IOException{
103                String xml = "<orgPdbCompositeQuery version=\"1.0\">\n" +
104                                " <queryRefinement>\n" +
105                                "  <queryRefinementLevel>0</queryRefinementLevel>\n" +
106                                "  <orgPdbQuery>\n" +
107                                "    <version>head</version>\n" +
108                                "    <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" +
109                                "    <description>Holdings : All Structures</description>\n" +
110                                "    <experimentalMethod>ignore</experimentalMethod>\n" +
111                                "    <moleculeType>ignore</moleculeType>\n" +
112                                "  </orgPdbQuery>\n" +
113                                " </queryRefinement>\n" +
114                                " <queryRefinement>\n" +
115                                "  <queryRefinementLevel>1</queryRefinementLevel>\n" +
116                                "  <conjunctionType>and</conjunctionType>\n" +
117                                "  <orgPdbQuery>\n" +
118                                "    <version>head</version>\n" +
119                                "    <queryType>org.pdb.query.simple.ExpTypeQuery</queryType>\n" +
120                                "    <description>Experimental Method is SOLUTION NMR</description>\n" +
121                                "    <mvStructure.expMethod.value>SOLUTION NMR</mvStructure.expMethod.value>\n" +
122                                "    <mvStructure.expMethod.exclusive>y</mvStructure.expMethod.exclusive>\n" +
123                                "  </orgPdbQuery>\n" +
124                                " </queryRefinement>\n" +
125                                "</orgPdbCompositeQuery>\n";
126
127
128                return postQuery(xml);
129        }
130
131
132        /** get all PDB IDs of gag-polyproteins
133         *
134         * @return list of PDB IDs
135         * @throws IOException
136         */
137        public static Set<String> getGagPolyproteins() throws IOException {
138                String xml = "<orgPdbCompositeQuery version=\"1.0\">\n" +
139                                " <queryRefinement>\n" +
140                                "  <queryRefinementLevel>0</queryRefinementLevel>\n" +
141                                "  <orgPdbQuery>\n" +
142                                "    <version>head</version>\n" +
143                                "    <queryType>org.pdb.query.simple.HoldingsQuery</queryType>\n" +
144                                "    <description>Holdings : All Structures</description>\n" +
145                                "    <experimentalMethod>ignore</experimentalMethod>\n" +
146                                "    <moleculeType>ignore</moleculeType>\n" +
147                                "  </orgPdbQuery>\n" +
148                                " </queryRefinement>\n" +
149                                " <queryRefinement>\n" +
150                                "  <queryRefinementLevel>1</queryRefinementLevel>\n" +
151                                "  <conjunctionType>and</conjunctionType>\n" +
152                                "  <orgPdbQuery>\n" +
153                                "    <version>head</version>\n" +
154                                "    <queryType>org.pdb.query.simple.MacroMoleculeQuery</queryType>\n" +
155                                "    <description>Molecule : Gag-Pol polyprotein [A1Z651, O12158, P03355, P03366, P03367, P03369, P04584, P04585, P04586, P04587, P04588, P05896, P05897, P05959, P05961, P0C6F2, P12497, P12499, P18042, P19505 ... ]</description>\n" +
156                                "    <macromoleculeName>A1Z651,O12158,P03355,P03366,P03367,P03369,P04584,P04585,P04586,P04587,P04588,P05896,P05897,P05959,P05961,P0C6F2,P12497,P12499,P18042,P19505,P19560,P20875,P24740,P35963,Q699E2,Q70XD7,Q72547,Q7SMT3,Q7SPG9,Q90VT5</macromoleculeName>\n" +
157                                "  </orgPdbQuery>\n" +
158                                " </queryRefinement>\n" +
159                                "</orgPdbCompositeQuery>";
160
161                return postQuery(xml);
162        }
163
164        /** get all Transmembrane proteins
165         *
166         * @return list of PDB IDs
167         * @throws IOException
168         */
169        public static Set<String> getTransmembraneProteins() throws IOException {
170                String xml = "  <orgPdbQuery>\n" +
171                                "    <version>head</version>\n" +
172                                "    <queryType>org.pdb.query.simple.TreeQuery</queryType>\n" +
173                                "    <description>TransmembraneTree Search for root</description>\n" +
174                                "    <t>19</t>\n" +
175                                "    <n>0</n>\n" +
176                                "    <nodeDesc>root</nodeDesc>\n" +
177                                "  </orgPdbQuery>";
178
179                return postQuery(xml);
180        }
181
182        public static Set<String> getNucleotides() throws IOException{
183                String xml ="<orgPdbQuery>\n" +
184                                "    <version>head</version>\n" +
185                                "    <queryType>org.pdb.query.simple.ChainTypeQuery</queryType>\n" +
186                                "    <description>Chain Type: there is not any Protein chain</description>\n" +
187                                "    <containsProtein>N</containsProtein>\n" +
188                                "    <containsDna>?</containsDna>\n" +
189                                "    <containsRna>?</containsRna>\n" +
190                                "    <containsHybrid>?</containsHybrid>\n" +
191                                "  </orgPdbQuery>";
192                return postQuery(xml);
193        }
194
195        public static Set<String>getRibosomes() throws IOException{
196                String xml = "<orgPdbQuery>\n" +
197                                "    <version>head</version>\n" +
198                                "    <queryType>org.pdb.query.simple.StructureKeywordsQuery</queryType>\n" +
199                                "    <description>StructureKeywordsQuery: struct_keywords.pdbx_keywords.comparator=contains struct_keywords.pdbx_keywords.value=RIBOSOME </description>\n" +
200                                "    <struct_keywords.pdbx_keywords.comparator>contains</struct_keywords.pdbx_keywords.comparator>\n" +
201                                "    <struct_keywords.pdbx_keywords.value>RIBOSOME</struct_keywords.pdbx_keywords.value>\n" +
202                                "  </orgPdbQuery>";
203
204                return postQuery(xml);
205        }
206
207        public static final String SERVICELOCATION="http://www.rcsb.org/pdb/rest/search";
208
209
210        /** post am XML query (PDB XML query format)  to the RESTful RCSB web service
211         *
212         * @param xml
213         * @return a list of PDB ids.
214         */
215        public static Set<String> postQuery(String xml)
216                        throws IOException{
217
218                //System.out.println(xml);
219
220
221                URL u = new URL(SERVICELOCATION);
222
223
224                String encodedXML = URLEncoder.encode(xml,"UTF-8");
225
226
227                InputStream in =  doPOST(u,encodedXML);
228
229                Set<String> pdbIds = new TreeSet<String>();
230
231
232                try (BufferedReader rd = new BufferedReader(new InputStreamReader(in))) {
233
234                        String line;
235                        while ((line = rd.readLine()) != null) {
236
237                                pdbIds.add(line);
238
239                        }
240                        rd.close();
241                }
242
243
244                return pdbIds;
245
246
247
248        }
249
250        /** do a POST to a URL and return the response stream for further processing elsewhere.
251         *
252         *
253         * @param url
254         * @return
255         * @throws IOException
256         */
257        public static InputStream doPOST(URL url, String data)
258
259                        throws IOException
260        {
261
262                // Send data
263
264                URLConnection conn = url.openConnection();
265
266                conn.setDoOutput(true);
267
268                try(OutputStreamWriter wr = new OutputStreamWriter(conn.getOutputStream())) {
269
270                        wr.write(data);
271                        wr.flush();
272                }
273
274
275                // Get the response
276                return conn.getInputStream();
277
278        };
279
280        public static void main(String[] args){
281                try {
282                        System.out.println("Current PDB status: " + getCurrentPDBIds().size());
283                        System.out.println("Virus structures: " + getAllViruses().size());
284                        System.out.println("NMR structures: " + getNMRStructures().size());
285                        System.out.println("Gag-polyproteins: " + getGagPolyproteins().size());
286                        System.out.println("Transmembrane proteins: " + getTransmembraneProteins().size());
287                        System.out.println("Nucleotide: " + getNucleotides().size());
288                        System.out.println("Ribosomes: " + getRibosomes().size());
289                } catch ( Exception e){
290                        e.printStackTrace();
291                }
292        }
293}