001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.rcsb;
022
023import org.biojava.nbio.structure.align.client.JFatCatClient;
024import org.biojava.nbio.structure.align.client.StructureName;
025import org.biojava.nbio.structure.align.util.URLConnectionTools;
026import org.biojava.nbio.structure.align.xml.RepresentativeXMLConverter;
027
028import java.io.BufferedReader;
029import java.io.InputStream;
030import java.io.InputStreamReader;
031import java.net.URL;
032import java.util.Arrays;
033import java.util.List;
034import java.util.SortedSet;
035import java.util.TreeSet;
036
037/**
038 * TODO Move this to {@link Representatives}.
039 */
040public class GetRepresentatives {
041
042        private static String clusterUrl = "http://www.rcsb.org/pdb/rest/representatives?cluster=";
043        private static String allUrl = "http://www.rcsb.org/pdb/rest/getCurrent/";
044
045        // available sequence clusters
046        private static List<Integer> seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100);
047
048
049        /**
050         * Returns a representative set of PDB protein chains at the specified sequence
051         * identity cutoff. See http://www.pdb.org/pdb/statistics/clusterStatistics.do
052         * for more information.
053         * @param sequenceIdentity sequence identity threshold
054         * @return PdbChainKey set of representatives
055         */
056        public static SortedSet<StructureName> getRepresentatives(int sequenceIdentity) {
057                SortedSet<StructureName> representatives = new TreeSet<StructureName>();
058
059                if (!seqIdentities.contains(sequenceIdentity)) {
060                        System.err.println("Error: representative chains are not available for %sequence identity: "
061                                                        + sequenceIdentity);
062                        return representatives;
063                }
064
065
066                try {
067
068                        URL u = new URL(clusterUrl + sequenceIdentity);
069
070                        InputStream stream = URLConnectionTools.getInputStream(u, 60000);
071
072                        String xml = null;
073
074                        if (stream != null) {
075                                xml = JFatCatClient.convertStreamToString(stream);
076
077                                SortedSet<String> reps = RepresentativeXMLConverter.fromXML(xml);
078
079                                for (String s : reps) {
080                                        StructureName k = new StructureName(s);
081                                        representatives.add(k);
082                                }
083
084                        }
085
086                } catch (Exception e) {
087                        e.printStackTrace();
088                }
089
090                return representatives;
091        }
092
093        /**
094         * Returns the current list of all PDB IDs.
095         * @return PdbChainKey set of all PDB IDs.
096         */
097        public static SortedSet<String> getAll() {
098                SortedSet<String> representatives = new TreeSet<String>();
099
100                try {
101
102                        URL u = new URL(allUrl);
103
104                        InputStream stream = URLConnectionTools.getInputStream(u, 60000);
105
106                        if (stream != null) {
107                                BufferedReader reader = new BufferedReader(
108                                                new InputStreamReader(stream));
109
110                                String line = null;
111
112                                while ((line = reader.readLine()) != null) {
113                                        int index = line.lastIndexOf("structureId=");
114                                        if (index > 0) {
115                                                representatives.add(line.substring(index + 13, index + 17));
116                                        }
117                                }
118                        }
119
120                } catch (Exception e) {
121                        e.printStackTrace();
122                }
123
124                return representatives;
125        }
126}