001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.utils; 022 023import org.slf4j.Logger; 024import org.slf4j.LoggerFactory; 025 026import java.io.BufferedReader; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.InputStreamReader; 030import java.io.Serializable; 031import java.net.URL; 032import java.util.*; 033 034 035public class BlastClustReader implements Serializable { 036 037 private static final long serialVersionUID = 1L; 038 039 private static final Logger logger = LoggerFactory.getLogger(BlastClustReader.class); 040 041 private int sequenceIdentity = 0; 042 private List<List<String>> clusters = new ArrayList<>(); 043 // https://cdn.rcsb.org/resources/sequence/clusters/bc-95.out 044 private static final String coreUrl = "https://cdn.rcsb.org/resources/sequence/clusters/"; 045 046 private static final List<Integer> seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100); 047 048 public BlastClustReader(int sequenceIdentity) { 049 this.sequenceIdentity = sequenceIdentity; 050 } 051 052 public List<List<String>> getPdbChainIdClusters() { 053 loadClusters(sequenceIdentity); 054 return clusters; 055 } 056 057 public Map<String,String> getRepresentatives(String pdbId) { 058 loadClusters(sequenceIdentity); 059 String pdbIdUc = pdbId.toUpperCase(); 060 061 Map<String,String> representatives = new LinkedHashMap<>(); 062 for (List<String> cluster: clusters) { 063 // map fist match to representative 064 for (String chainId: cluster) { 065 if (chainId.startsWith(pdbIdUc)) { 066 representatives.put(chainId, cluster.get(0)); 067 break; 068 } 069 } 070 } 071 return representatives; 072 } 073 074 public String getRepresentativeChain(String pdbId, String chainId) { 075 loadClusters(sequenceIdentity); 076 077 String pdbChainId = pdbId.toUpperCase() + "." + chainId; 078 079 for (List<String> cluster: clusters) { 080 if (cluster.contains(pdbChainId)) { 081 return cluster.get(0); 082 } 083 } 084 return ""; 085 } 086 087 public int indexOf(String pdbId, String chainId) { 088 loadClusters(sequenceIdentity); 089 090 String pdbChainId = pdbId.toUpperCase() + "." + chainId; 091 092 for (int i = 0; i < clusters.size(); i++) { 093 List<String> cluster = clusters.get(i); 094 if (cluster.contains(pdbChainId)) { 095 return i; 096 } 097 } 098 return -1; 099 } 100 101 public List<List<String>> getPdbChainIdClusters(String pdbId) { 102 loadClusters(sequenceIdentity); 103 String pdbIdUpper = pdbId.toUpperCase(); 104 105 List<List<String>> matches = new ArrayList<List<String>>(); 106 for (List<String> cluster: clusters) { 107 for (String chainId: cluster) { 108 if (chainId.startsWith(pdbIdUpper)) { 109 matches.add(cluster); 110 break; 111 } 112 } 113 } 114 return matches; 115 } 116 117 public List<List<String>> getChainIdsInEntry(String pdbId) { 118 loadClusters(sequenceIdentity); 119 120 List<List<String>> matches = new ArrayList<List<String>>(); 121 List<String> match = null; 122 123 for (List<String> cluster: clusters) { 124 for (String chainId: cluster) { 125 if (chainId.startsWith(pdbId)) { 126 if (match == null) { 127 match = new ArrayList<String>(); 128 } 129 match.add(chainId.substring(5)); 130 } 131 } 132 if (match != null) { 133 Collections.sort(match); 134 matches.add(match); 135 match = null; 136 } 137 } 138 return matches; 139 } 140 141 private void loadClusters(int sequenceIdentity) { 142 // load clusters only once 143 if (clusters.size() > 0) { 144 return; 145 } 146 147 if (!seqIdentities.contains(sequenceIdentity)) { 148 logger.error("Representative chains are not available for %sequence identity: {}", sequenceIdentity); 149 return; 150 } 151 152 String urlString = coreUrl + "bc-" + sequenceIdentity + ".out"; 153 154 try { 155 156 URL u = new URL(urlString); 157 InputStream stream = u.openStream(); 158 159 if (stream != null) { 160 BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); 161 162 String line = null; 163 while ((line = reader.readLine()) != null) { 164 line = line.replaceAll("_", "."); 165 List<String> cluster = Arrays.asList(line.split(" ")); 166 clusters.add(cluster); 167 } 168 reader.close(); 169 stream.close(); 170 } else { 171 throw new IOException("Got null stream for URL " + urlString); 172 } 173 } catch (IOException e) { 174 logger.error("Could not get sequence clusters from URL " + urlString + ". Error: " + e.getMessage()); 175 } 176 177 } 178 179} 180