001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.contact; 022 023import java.io.Serializable; 024import java.util.ArrayList; 025import java.util.Collections; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.Optional; 030import java.util.Set; 031import java.util.TreeMap; 032 033import org.biojava.nbio.core.util.SingleLinkageClusterer; 034import org.biojava.nbio.structure.Atom; 035import org.biojava.nbio.structure.Structure; 036import org.biojava.nbio.structure.asa.AsaCalculator; 037import org.biojava.nbio.structure.xtal.CrystalBuilder; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041 042/** 043 * A list of interfaces between 2 molecules (2 sets of atoms) 044 * 045 * @author Jose Duarte 046 * 047 */ 048public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> { 049 050 private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class); 051 052 /** 053 * Default minimum area for a contact between two chains to be considered a 054 * valid interface. 055 * @see #removeInterfacesBelowArea(double); 056 */ 057 public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0; 058 /** 059 * Default number of points to use when calculating ASAs 060 * @see #calcAsas(int, int, int) 061 */ 062 public static final int DEFAULT_ASA_SPHERE_POINTS = 3000; 063 /** 064 * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used 065 * @see #calcAsas(int, int, int) 066 */ 067 public static final int DEFAULT_MIN_COFACTOR_SIZE = 40; 068 069 /** 070 * Any 2 interfaces with contact overlap score larger than this value 071 * will be considered to be clustered 072 */ 073 public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2; 074 075 private static final long serialVersionUID = 1L; 076 077 private List<StructureInterface> list; 078 079 private List<StructureInterfaceCluster> clusters = null; 080 private List<StructureInterfaceCluster> clustersNcs = null; 081 082 private Map<String, String> chainOrigNamesMap; 083 084 public StructureInterfaceList() { 085 this.list = new ArrayList<>(); 086 } 087 088 public void add(StructureInterface interf) { 089 this.list.add(interf); 090 } 091 092 public int size() { 093 return this.list.size(); 094 } 095 096 /** 097 * Gets the interface corresponding to given id. 098 * The ids go from 1 to n 099 * If {@link #sort()} was called then the order is descendent by area. 100 * @param id 101 * @return 102 */ 103 public StructureInterface get(int id) { 104 return list.get(id-1); 105 } 106 107 /** 108 * Calculates ASAs for all interfaces in list, both for the unbound 109 * chains and for the complex of the two chains together. 110 * Also sorts the interfaces based on calculated BSA areas (descending). 111 * 112 * <p>Uses default parameters 113 */ 114 public void calcAsas() { 115 calcAsas( DEFAULT_ASA_SPHERE_POINTS, 116 Runtime.getRuntime().availableProcessors(), 117 DEFAULT_MIN_COFACTOR_SIZE ); 118 } 119 /** 120 * Calculates ASAs for all interfaces in list, both for the unbound 121 * chains and for the complex of the two chains together. 122 * Also sorts the interfaces based on calculated BSA areas (descending) 123 * @param nSpherePoints 124 * @param nThreads 125 * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used 126 */ 127 public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) { 128 129 // asa/bsa calculation 130 // NOTE in principle it is more efficient to calculate asas only once per unique chain 131 // BUT! the rolling ball algorithm gives slightly different values for same molecule in different 132 // rotations (due to sampling depending on orientation of axes grid). 133 // Both NACCESS and our own implementation behave like that. 134 // That's why we calculate ASAs for each rotation-unique molecule, otherwise 135 // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values 136 137 138 Map<String, Atom[]> uniqAsaChains = new TreeMap<>(); 139 Map<String, double[]> chainAsas = new TreeMap<>(); 140 141 List<StructureInterface> redundancyReducedList; 142 if (clustersNcs != null) { 143 redundancyReducedList = new ArrayList<>(); 144 for (StructureInterfaceCluster ncsCluster : clustersNcs) { 145 // we use the first one in list as the only one for which we calculate ASAs 146 redundancyReducedList.add(ncsCluster.getMembers().get(0)); 147 } 148 149 } else { 150 redundancyReducedList = list; 151 } 152 153 // first we gather rotation-unique chains (in terms of AU id and transform id) 154 for (StructureInterface interf:redundancyReducedList) { 155 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 156 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 157 158 uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse)); 159 uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse)); 160 } 161 162 logger.debug("Will calculate uncomplexed ASA for {} orientation-unique chains.", uniqAsaChains.size()); 163 164 long start = System.currentTimeMillis(); 165 166 // we only need to calculate ASA for that subset (any translation of those will have same values) 167 for (String molecId:uniqAsaChains.keySet()) { 168 169 logger.debug("Calculating uncomplexed ASA for molecId {}, with {} atoms", molecId, uniqAsaChains.get(molecId).length); 170 171 AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId), 172 AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); 173 174 double[] atomAsas = asaCalc.calculateAsas(); 175 176 chainAsas.put(molecId, atomAsas); 177 178 } 179 long end = System.currentTimeMillis(); 180 181 logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0)); 182 183 logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", redundancyReducedList.size()); 184 185 start = System.currentTimeMillis(); 186 187 // now we calculate the ASAs for each of the complexes 188 for (StructureInterface interf:redundancyReducedList) { 189 190 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 191 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 192 193 logger.debug("Calculating complexed ASAs for interface {} between molecules {} and {}", interf.getId(), molecId1, molecId2); 194 195 interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse); 196 197 } 198 end = System.currentTimeMillis(); 199 200 logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", redundancyReducedList.size(), ((end-start)/1000.0)); 201 202 // now let's populate the interface area value for the NCS-redundant ones from the reference interface (first one in list) 203 if (clustersNcs!=null) { 204 if (chainOrigNamesMap==null) { 205 logger.warn("No chainOrigNamesMap is set. Considering NCS interfaces in same order as reference. This is likely a bug."); 206 } 207 for (StructureInterfaceCluster ncsCluster : clustersNcs) { 208 StructureInterface refInterf = ncsCluster.getMembers().get(0); 209 String refMolecId1 = refInterf.getMoleculeIds().getFirst(); 210 for (int i=1;i<ncsCluster.getMembers().size();i++) { 211 StructureInterface member = ncsCluster.getMembers().get(i); 212 member.setTotalArea(refInterf.getTotalArea()); 213 String molecId1 = member.getMoleculeIds().getFirst(); 214 if (areMolecIdsSameOrder(refMolecId1, molecId1)) { 215 // we add the reference interface GroupAsas as the GroupAsas for all other members, like that 216 // ResidueNumbers won't match in their chain ids, but otherwise all info is there without using a lot of memory 217 member.setFirstGroupAsas(refInterf.getFirstGroupAsas()); 218 member.setSecondGroupAsas(refInterf.getSecondGroupAsas()); 219 } else { 220 member.setFirstGroupAsas(refInterf.getSecondGroupAsas()); 221 member.setSecondGroupAsas(refInterf.getFirstGroupAsas()); 222 } 223 } 224 } 225 } 226 227 // finally we sort based on the ChainInterface.comparable() (based in interfaceArea) 228 sort(); 229 } 230 231 private boolean areMolecIdsSameOrder(String refMolecId, String molecId) { 232 233 if (chainOrigNamesMap==null) { 234 // we've already warned above 235 return true; 236 } 237 238 String refMolecIdOrig = chainOrigNamesMap.get(refMolecId); 239 String molecIdOrig = chainOrigNamesMap.get(molecId); 240 241 return (refMolecIdOrig.equals(molecIdOrig)); 242 } 243 244 /** 245 * Sorts the interface list and reassigns ids based on new sorting 246 * 247 */ 248 public void sort() { 249 Collections.sort(list); 250 int i=1; 251 for (StructureInterface interf:list) { 252 interf.setId(i); 253 i++; 254 } 255 } 256 257 /** 258 * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence. 259 * This means that for any two interfaces in the same cluster: 260 * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order. 261 * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical. 262 * @return list of {@link StructureInterfaceCluster} objects. 263 * @since 5.0.0 264 */ 265 public List<StructureInterfaceCluster> getClustersNcs() { 266 return clustersNcs; 267 } 268 269 /** 270 * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list. 271 * Used to build up the NCS clustering. 272 * @param interfaceNew 273 * an interface to be added to the list. 274 * @param interfaceRef 275 * interfaceNew will be added to the cluster which contains interfaceRef. 276 * If interfaceRef is null, new cluster will be created for interfaceNew. 277 * @since 5.0.0 278 */ 279 public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) { 280 281 this.add(interfaceNew); 282 if (clustersNcs == null) { 283 clustersNcs = new ArrayList<>(); 284 } 285 286 if (interfaceRef == null) { 287 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 288 newCluster.addMember(interfaceNew); 289 clustersNcs.add(newCluster); 290 return; 291 } 292 293 Optional<StructureInterfaceCluster> clusterRef = 294 clustersNcs.stream(). 295 filter(r->r.getMembers().stream(). 296 anyMatch(c -> c.equals(interfaceRef))). 297 findFirst(); 298 299 if (clusterRef.isPresent()) { 300 clusterRef.get().addMember(interfaceNew); 301 return; 302 } 303 304 logger.warn("The specified reference interface, if not null, should have been added to this set previously. " + 305 "Creating new cluster and adding both interfaces. This is likely a bug."); 306 this.add(interfaceRef); 307 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 308 newCluster.addMember(interfaceRef); 309 newCluster.addMember(interfaceNew); 310 clustersNcs.add(newCluster); 311 } 312 313 /** 314 * Sets a map with mapping from NCS chain names to original chain names. 315 * Necessary when {@link #addNcsEquivalent(StructureInterface, StructureInterface)} is used and NCS equivalent 316 * interfaces exist in this list and their names need mapping when setting ASAs. 317 * @param chainOrigNamesMap a map of NCS chain name to original chain name 318 */ 319 public void setChainOrigNamesMap(Map<String, String> chainOrigNamesMap) { 320 this.chainOrigNamesMap = chainOrigNamesMap; 321 } 322 323 /** 324 * Removes from this interface list all interfaces with areas 325 * below the default cutoff area. 326 * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would 327 * be 0 and thus all removed. 328 * @see #DEFAULT_MINIMUM_INTERFACE_AREA 329 */ 330 public void removeInterfacesBelowArea() { 331 removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA); 332 } 333 334 /** 335 * Removes from this interface list all interfaces with areas 336 * below the given cutoff area. 337 * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would 338 * be 0 and thus all removed. 339 * @param area the minimum interface buried surface area to keep. Interfaces below this value will be removed. 340 */ 341 public void removeInterfacesBelowArea(double area) { 342 343 list.removeIf(interf -> interf.getTotalArea() < area); 344 345 if (clustersNcs != null) { 346 clustersNcs.removeIf(ncsCluster -> ncsCluster.getMembers().get(0).getTotalArea() < area); 347 } 348 } 349 350 /** 351 * Calculate the interface clusters for this StructureInterfaceList 352 * using a contact overlap score to measure the similarity of interfaces. 353 * Subsequent calls will use the cached value without recomputing the clusters. 354 * The contact overlap score cutoff to consider a pair in the same cluster is 355 * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF} 356 * @return 357 */ 358 public List<StructureInterfaceCluster> getClusters() { 359 return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF); 360 } 361 362 /** 363 * Calculate the interface clusters for this StructureInterfaceList 364 * using a contact overlap score to measure the similarity of interfaces. 365 * Subsequent calls will use the cached value without recomputing the clusters. 366 * The clusters will be assigned ids by sorting descending by {@link StructureInterfaceCluster#getTotalArea()} 367 * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be 368 * clustered 369 * @return 370 */ 371 public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) { 372 if (clusters!=null) { 373 return clusters; 374 } 375 376 clusters = new ArrayList<>(); 377 378 // nothing to do if we have no interfaces 379 if (list.size()==0) return clusters; 380 381 double[][] matrix = new double[list.size()][list.size()]; 382 383 for (int i=0;i<list.size();i++) { 384 for (int j=i+1;j<list.size();j++) { 385 StructureInterface iInterf = list.get(i); 386 StructureInterface jInterf = list.get(j); 387 388 double scoreDirect = iInterf.getContactOverlapScore(jInterf, false); 389 double scoreInvert = iInterf.getContactOverlapScore(jInterf, true); 390 391 double maxScore = Math.max(scoreDirect, scoreInvert); 392 393 matrix[i][j] = maxScore; 394 } 395 396 } 397 398 SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true); 399 400 Map<Integer, Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff); 401 for (int clusterIdx:clusteredIndices.keySet()) { 402 List<StructureInterface> members = new ArrayList<>(); 403 for (int idx:clusteredIndices.get(clusterIdx)) { 404 members.add(list.get(idx)); 405 } 406 StructureInterfaceCluster cluster = new StructureInterfaceCluster(); 407 cluster.setMembers(members); 408 double averageScore = 0.0; 409 int countPairs = 0; 410 for (int i=0;i<members.size();i++) { 411 int iIdx = list.indexOf(members.get(i)); 412 for (int j=i+1;j<members.size();j++) { 413 averageScore += matrix[iIdx][list.indexOf(members.get(j))]; 414 countPairs++; 415 } 416 } 417 if (countPairs>0) { 418 averageScore = averageScore/countPairs; 419 } else { 420 // if only one interface in cluster we set the score to the maximum 421 averageScore = 1.0; 422 } 423 cluster.setAverageScore(averageScore); 424 clusters.add(cluster); 425 } 426 427 // finally we have to set the back-references in each StructureInterface 428 for (StructureInterfaceCluster cluster:clusters) { 429 for (StructureInterface interf:cluster.getMembers()) { 430 interf.setCluster(cluster); 431 } 432 } 433 434 // now we sort by areas (descending) and assign ids based on that sorting 435 clusters.sort((o1, o2) -> { 436 return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending 437 }); 438 int id = 1; 439 for (StructureInterfaceCluster cluster:clusters) { 440 cluster.setId(id); 441 id++; 442 } 443 444 return clusters; 445 } 446 447 @Override 448 public Iterator<StructureInterface> iterator() { 449 return list.iterator(); 450 } 451 452 @Override 453 public String toString() { 454 return list.toString(); 455 } 456 457 /** 458 * Calculates the interfaces for a structure using default parameters 459 * @param struc 460 * @return 461 */ 462 public static StructureInterfaceList calculateInterfaces(Structure struc) { 463 CrystalBuilder builder = new CrystalBuilder(struc); 464 StructureInterfaceList interfaces = builder.getUniqueInterfaces(); 465 logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces"); 466 interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance 467 Runtime.getRuntime().availableProcessors(), 468 StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE); 469 interfaces.removeInterfacesBelowArea(); 470 interfaces.getClusters(); 471 logger.debug("Found "+interfaces.size()+" interfaces"); 472 return interfaces; 473 } 474 475}