001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.symmetry.utils;
022
023import org.slf4j.Logger;
024import org.slf4j.LoggerFactory;
025
026import java.io.BufferedReader;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.Serializable;
031import java.net.URL;
032import java.util.*;
033
034
035public class BlastClustReader implements Serializable {
036
037        private static final long serialVersionUID = 1L;
038
039        private static final Logger logger = LoggerFactory.getLogger(BlastClustReader.class);
040
041        private int sequenceIdentity = 0;
042        private List<List<String>> clusters = new ArrayList<>();
043        // https://cdn.rcsb.org/resources/sequence/clusters/bc-95.out
044        private static final String coreUrl = "https://cdn.rcsb.org/resources/sequence/clusters/";
045
046        private static final List<Integer> seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100);
047
048        public BlastClustReader(int sequenceIdentity)  {
049                this.sequenceIdentity = sequenceIdentity;
050        }
051
052        public List<List<String>> getPdbChainIdClusters() {
053                loadClusters(sequenceIdentity);
054                return clusters;
055        }
056
057        public Map<String,String> getRepresentatives(String pdbId) {
058                loadClusters(sequenceIdentity);
059                String pdbIdUc = pdbId.toUpperCase();
060
061                Map<String,String> representatives = new LinkedHashMap<>();
062                for (List<String> cluster: clusters) {
063                        // map fist match to representative
064                        for (String chainId: cluster) {
065                                if (chainId.startsWith(pdbIdUc)) {
066                                        representatives.put(chainId, cluster.get(0));
067                                        break;
068                                }
069                        }
070                }
071                return representatives;
072        }
073
074        public String getRepresentativeChain(String pdbId, String chainId) {
075                loadClusters(sequenceIdentity);
076
077                String pdbChainId = pdbId.toUpperCase() + "." + chainId;
078
079                for (List<String> cluster: clusters) {
080                        if (cluster.contains(pdbChainId)) {
081                                return cluster.get(0);
082                        }
083                }
084                return "";
085        }
086
087        public int indexOf(String pdbId, String chainId) {
088                loadClusters(sequenceIdentity);
089
090                String pdbChainId = pdbId.toUpperCase() + "." + chainId;
091
092                for (int i = 0; i < clusters.size(); i++) {
093                        List<String> cluster = clusters.get(i);
094                        if (cluster.contains(pdbChainId)) {
095                                return i;
096                        }
097                }
098                return -1;
099        }
100
101        public List<List<String>> getPdbChainIdClusters(String pdbId) {
102                loadClusters(sequenceIdentity);
103                String pdbIdUpper = pdbId.toUpperCase();
104
105                List<List<String>> matches = new ArrayList<List<String>>();
106                for (List<String> cluster: clusters) {
107                        for (String chainId: cluster) {
108                                if (chainId.startsWith(pdbIdUpper)) {
109                                        matches.add(cluster);
110                                        break;
111                                }
112                        }
113                }
114                return matches;
115        }
116
117        public List<List<String>> getChainIdsInEntry(String pdbId) {
118                loadClusters(sequenceIdentity);
119
120                List<List<String>> matches = new ArrayList<List<String>>();
121                List<String> match = null;
122
123                for (List<String> cluster: clusters) {
124                        for (String chainId: cluster) {
125                                if (chainId.startsWith(pdbId)) {
126                                        if (match == null) {
127                                                match = new ArrayList<String>();
128                                        }
129                                        match.add(chainId.substring(5));
130                                }
131                        }
132                        if (match != null) {
133                                Collections.sort(match);
134                                matches.add(match);
135                                match = null;
136                        }
137                }
138                return matches;
139        }
140
141        private void loadClusters(int sequenceIdentity) {
142                // load clusters only once
143                if (clusters.size() > 0) {
144                        return;
145                }
146
147                if (!seqIdentities.contains(sequenceIdentity)) {
148                        logger.error("Representative chains are not available for %sequence identity: {}", sequenceIdentity);
149                        return;
150                }
151
152                String urlString = coreUrl + "bc-" + sequenceIdentity + ".out";
153
154                try {
155
156                        URL u = new URL(urlString);
157                        InputStream stream = u.openStream();
158
159                        if (stream != null) {
160                                BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
161
162                                String line = null;
163                                while ((line = reader.readLine()) != null) {
164                                        line = line.replaceAll("_", ".");
165                                        List<String> cluster = Arrays.asList(line.split(" "));
166                                        clusters.add(cluster);
167                                }
168                                reader.close();
169                                stream.close();
170                        } else {
171                                throw new IOException("Got null stream for URL " + urlString);
172                        }
173                } catch (IOException e) {
174                        logger.error("Could not get sequence clusters from URL " + urlString + ". Error: " + e.getMessage());
175                }
176
177        }
178
179}
180