001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.contact; 022 023import java.io.Serializable; 024import java.util.*; 025 026import org.biojava.nbio.core.util.SingleLinkageClusterer; 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.Structure; 029import org.biojava.nbio.structure.asa.AsaCalculator; 030import org.biojava.nbio.structure.xtal.CrystalBuilder; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034 035/** 036 * A list of interfaces between 2 molecules (2 sets of atoms) 037 * 038 * @author Jose Duarte 039 * 040 */ 041public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> { 042 043 private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class); 044 045 /** 046 * Default minimum area for a contact between two chains to be considered a 047 * valid interface. 048 * @see #removeInterfacesBelowArea(double); 049 */ 050 public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0; 051 /** 052 * Default number of points to use when calculating ASAs 053 * @see #calcAsas(int, int, int) 054 */ 055 public static final int DEFAULT_ASA_SPHERE_POINTS = 3000; 056 /** 057 * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used 058 * @see #calcAsas(int, int, int) 059 */ 060 public static final int DEFAULT_MIN_COFACTOR_SIZE = 40; 061 062 /** 063 * Any 2 interfaces with contact overlap score larger than this value 064 * will be considered to be clustered 065 */ 066 public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2; 067 068 private static final long serialVersionUID = 1L; 069 070 private List<StructureInterface> list; 071 072 private List<StructureInterfaceCluster> clusters = null; 073 private List<StructureInterfaceCluster> clustersNcs = null; 074 075 public StructureInterfaceList() { 076 this.list = new ArrayList<>(); 077 } 078 079 public void add(StructureInterface interf) { 080 this.list.add(interf); 081 } 082 083 public int size() { 084 return this.list.size(); 085 } 086 087 /** 088 * Gets the interface corresponding to given id. 089 * The ids go from 1 to n 090 * If {@link #sort()} was called then the order is descendent by area. 091 * @param id 092 * @return 093 */ 094 public StructureInterface get(int id) { 095 return list.get(id-1); 096 } 097 098 /** 099 * Calculates ASAs for all interfaces in list, both for the unbound 100 * chains and for the complex of the two chains together. 101 * Also sorts the interfaces based on calculated BSA areas (descending). 102 * 103 * <p>Uses default parameters 104 */ 105 public void calcAsas() { 106 calcAsas( DEFAULT_ASA_SPHERE_POINTS, 107 Runtime.getRuntime().availableProcessors(), 108 DEFAULT_MIN_COFACTOR_SIZE ); 109 } 110 /** 111 * Calculates ASAs for all interfaces in list, both for the unbound 112 * chains and for the complex of the two chains together. 113 * Also sorts the interfaces based on calculated BSA areas (descending) 114 * @param nSpherePoints 115 * @param nThreads 116 * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used 117 */ 118 public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) { 119 120 // asa/bsa calculation 121 // NOTE in principle it is more efficient to calculate asas only once per unique chain 122 // BUT! the rolling ball algorithm gives slightly different values for same molecule in different 123 // rotations (due to sampling depending on orientation of axes grid). 124 // Both NACCESS and our own implementation behave like that. 125 // That's why we calculate ASAs for each rotation-unique molecule, otherwise 126 // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values 127 128 129 Map<String, Atom[]> uniqAsaChains = new TreeMap<>(); 130 Map<String, double[]> chainAsas = new TreeMap<>(); 131 132 // first we gather rotation-unique chains (in terms of AU id and transform id) 133 for (StructureInterface interf:list) { 134 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 135 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 136 137 uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse)); 138 uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse)); 139 } 140 141 logger.debug("Will calculate uncomplexed ASA for {} orientation-unique chains.", uniqAsaChains.size()); 142 143 long start = System.currentTimeMillis(); 144 145 // we only need to calculate ASA for that subset (any translation of those will have same values) 146 for (String molecId:uniqAsaChains.keySet()) { 147 148 logger.debug("Calculating uncomplexed ASA for molecId {}, with {} atoms", molecId, uniqAsaChains.get(molecId).length); 149 150 AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId), 151 AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); 152 153 double[] atomAsas = asaCalc.calculateAsas(); 154 155 chainAsas.put(molecId, atomAsas); 156 157 } 158 long end = System.currentTimeMillis(); 159 160 logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0)); 161 162 logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", list.size()); 163 164 start = System.currentTimeMillis(); 165 166 // now we calculate the ASAs for each of the complexes 167 for (StructureInterface interf:list) { 168 169 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 170 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 171 172 logger.debug("Calculating complexed ASAs for interface {} between molecules {} and {}", interf.getId(), molecId1, molecId2); 173 174 interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse); 175 176 } 177 end = System.currentTimeMillis(); 178 179 logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", list.size(), ((end-start)/1000.0)); 180 181 182 // finally we sort based on the ChainInterface.comparable() (based in interfaceArea) 183 sort(); 184 } 185 186 /** 187 * Sorts the interface list and reassigns ids based on new sorting 188 */ 189 public void sort() { 190 Collections.sort(list); 191 int i=1; 192 for (StructureInterface interf:list) { 193 interf.setId(i); 194 i++; 195 } 196 } 197 198 /** 199 * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence. 200 * This means that for any two interfaces in the same cluster: 201 * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order. 202 * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical. 203 * @return list of {@link StructureInterfaceCluster} objects. 204 * @since 5.0.0 205 */ 206 public List<StructureInterfaceCluster> getClustersNcs() { 207 return clustersNcs; 208 } 209 210 /** 211 * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list. 212 * Used to build up the NCS clustering. 213 * @param interfaceNew 214 * an interface to be added to the list. 215 * @param interfaceRef 216 * interfaceNew will be added to the cluster which contains interfaceRef. 217 * If interfaceRef is null, new cluster will be created for interfaceNew. 218 * @since 5.0.0 219 */ 220 public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) { 221 222 this.add(interfaceNew); 223 if (clustersNcs == null) { 224 clustersNcs = new ArrayList<>(); 225 } 226 227 if (interfaceRef == null) { 228 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 229 newCluster.addMember(interfaceNew); 230 clustersNcs.add(newCluster); 231 return; 232 } 233 234 Optional<StructureInterfaceCluster> clusterRef = 235 clustersNcs.stream(). 236 filter(r->r.getMembers().stream(). 237 anyMatch(c -> c.equals(interfaceRef))). 238 findFirst(); 239 240 if (clusterRef.isPresent()) { 241 clusterRef.get().addMember(interfaceNew); 242 return; 243 } 244 245 logger.warn("The specified reference interface, if not null, should have been added to this set previously. " + 246 "Creating new cluster and adding both interfaces. This is likely a bug."); 247 this.add(interfaceRef); 248 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 249 newCluster.addMember(interfaceRef); 250 newCluster.addMember(interfaceNew); 251 clustersNcs.add(newCluster); 252 } 253 254 /** 255 * Removes from this interface list all interfaces with areas 256 * below the default cutoff area 257 * @see #DEFAULT_MINIMUM_INTERFACE_AREA 258 */ 259 public void removeInterfacesBelowArea() { 260 removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA); 261 } 262 263 /** 264 * Removes from this interface list all interfaces with areas 265 * below the given cutoff area 266 * @param area 267 */ 268 public void removeInterfacesBelowArea(double area) { 269 Iterator<StructureInterface> it = iterator(); 270 while (it.hasNext()) { 271 StructureInterface interf = it.next(); 272 if (interf.getTotalArea()<area) { 273 it.remove(); 274 } 275 } 276 } 277 278 /** 279 * Calculate the interface clusters for this StructureInterfaceList 280 * using a contact overlap score to measure the similarity of interfaces. 281 * Subsequent calls will use the cached value without recomputing the clusters. 282 * The contact overlap score cutoff to consider a pair in the same cluster is 283 * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF} 284 * @return 285 */ 286 public List<StructureInterfaceCluster> getClusters() { 287 return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF); 288 } 289 290 /** 291 * Calculate the interface clusters for this StructureInterfaceList 292 * using a contact overlap score to measure the similarity of interfaces. 293 * Subsequent calls will use the cached value without recomputing the clusters. 294 * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be 295 * clustered 296 * @return 297 */ 298 public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) { 299 if (clusters!=null) { 300 return clusters; 301 } 302 303 clusters = new ArrayList<StructureInterfaceCluster>(); 304 305 // nothing to do if we have no interfaces 306 if (list.size()==0) return clusters; 307 308 double[][] matrix = new double[list.size()][list.size()]; 309 310 for (int i=0;i<list.size();i++) { 311 for (int j=i+1;j<list.size();j++) { 312 StructureInterface iInterf = list.get(i); 313 StructureInterface jInterf = list.get(j); 314 315 double scoreDirect = iInterf.getContactOverlapScore(jInterf, false); 316 double scoreInvert = iInterf.getContactOverlapScore(jInterf, true); 317 318 double maxScore = Math.max(scoreDirect, scoreInvert); 319 320 matrix[i][j] = maxScore; 321 } 322 323 } 324 325 SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true); 326 327 Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff); 328 for (int clusterIdx:clusteredIndices.keySet()) { 329 List<StructureInterface> members = new ArrayList<StructureInterface>(); 330 for (int idx:clusteredIndices.get(clusterIdx)) { 331 members.add(list.get(idx)); 332 } 333 StructureInterfaceCluster cluster = new StructureInterfaceCluster(); 334 cluster.setMembers(members); 335 double averageScore = 0.0; 336 int countPairs = 0; 337 for (int i=0;i<members.size();i++) { 338 for (int j=i+1;j<members.size();j++) { 339 averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1]; 340 countPairs++; 341 } 342 } 343 if (countPairs>0) { 344 averageScore = averageScore/countPairs; 345 } else { 346 // if only one interface in cluster we set the score to the maximum 347 averageScore = 1.0; 348 } 349 cluster.setAverageScore(averageScore); 350 clusters.add(cluster); 351 } 352 353 // finally we have to set the back-references in each StructureInterface 354 for (StructureInterfaceCluster cluster:clusters) { 355 for (StructureInterface interf:cluster.getMembers()) { 356 interf.setCluster(cluster); 357 } 358 } 359 360 // now we sort by areas (descending) and assign ids based on that sorting 361 Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() { 362 @Override 363 public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) { 364 return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending 365 } 366 }); 367 int id = 1; 368 for (StructureInterfaceCluster cluster:clusters) { 369 cluster.setId(id); 370 id++; 371 } 372 373 374 return clusters; 375 } 376 377 @Override 378 public Iterator<StructureInterface> iterator() { 379 return list.iterator(); 380 } 381 382 @Override 383 public String toString() { 384 return list.toString(); 385 } 386 387 /** 388 * Calculates the interfaces for a structure using default parameters 389 * @param struc 390 * @return 391 */ 392 public static StructureInterfaceList calculateInterfaces(Structure struc) { 393 CrystalBuilder builder = new CrystalBuilder(struc); 394 StructureInterfaceList interfaces = builder.getUniqueInterfaces(); 395 logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces"); 396 interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance 397 Runtime.getRuntime().availableProcessors(), 398 StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE); 399 interfaces.removeInterfacesBelowArea(); 400 interfaces.getClusters(); 401 logger.debug("Found "+interfaces.size()+" interfaces"); 402 return interfaces; 403 } 404 405}