001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.contact; 022 023import java.io.Serializable; 024import java.util.*; 025 026import org.biojava.nbio.core.util.SingleLinkageClusterer; 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.Structure; 029import org.biojava.nbio.structure.asa.AsaCalculator; 030import org.biojava.nbio.structure.xtal.CrystalBuilder; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034 035/** 036 * A list of interfaces between 2 molecules (2 sets of atoms) 037 * 038 * @author duarte_j 039 * 040 */ 041public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> { 042 043 private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class); 044 045 /** 046 * Default minimum area for a contact between two chains to be considered a 047 * valid interface. 048 * @see #removeInterfacesBelowArea(double); 049 */ 050 public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0; 051 /** 052 * Default number of points to use when calculating ASAs 053 * @see #calcAsas(int, int, int) 054 */ 055 public static final int DEFAULT_ASA_SPHERE_POINTS = 3000; 056 /** 057 * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used 058 * @see #calcAsas(int, int, int) 059 */ 060 public static final int DEFAULT_MIN_COFACTOR_SIZE = 40; 061 062 /** 063 * Any 2 interfaces with contact overlap score larger than this value 064 * will be considered to be clustered 065 */ 066 public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2; 067 068 private static final long serialVersionUID = 1L; 069 070 private List<StructureInterface> list; 071 072 private List<StructureInterfaceCluster> clusters = null; 073 private List<StructureInterfaceCluster> clustersNcs = null; 074 075 public StructureInterfaceList() { 076 this.list = new ArrayList<>(); 077 } 078 079 public void add(StructureInterface interf) { 080 this.list.add(interf); 081 } 082 083 public int size() { 084 return this.list.size(); 085 } 086 087 /** 088 * Gets the interface corresponding to given id. 089 * The ids go from 1 to n 090 * If {@link #sort()} was called then the order is descendent by area. 091 * @param id 092 * @return 093 */ 094 public StructureInterface get(int id) { 095 return list.get(id-1); 096 } 097 098 /** 099 * Calculates ASAs for all interfaces in list, both for the unbound 100 * chains and for the complex of the two chains together. 101 * Also sorts the interfaces based on calculated BSA areas (descending). 102 * 103 * <p>Uses default parameters 104 */ 105 public void calcAsas() { 106 calcAsas( DEFAULT_ASA_SPHERE_POINTS, 107 Runtime.getRuntime().availableProcessors(), 108 DEFAULT_MIN_COFACTOR_SIZE ); 109 } 110 /** 111 * Calculates ASAs for all interfaces in list, both for the unbound 112 * chains and for the complex of the two chains together. 113 * Also sorts the interfaces based on calculated BSA areas (descending) 114 * @param nSpherePoints 115 * @param nThreads 116 * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used 117 */ 118 public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) { 119 120 // asa/bsa calculation 121 // NOTE in principle it is more efficient to calculate asas only once per unique chain 122 // BUT! the rolling ball algorithm gives slightly different values for same molecule in different 123 // rotations (due to sampling depending on orientation of axes grid). 124 // Both NACCESS and our own implementation behave like that. 125 // That's why we calculate ASAs for each rotation-unique molecule, otherwise 126 // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values 127 128 129 Map<String, Atom[]> uniqAsaChains = new TreeMap<String, Atom[]>(); 130 Map<String, double[]> chainAsas = new TreeMap<String, double[]>(); 131 132 // first we gather rotation-unique chains (in terms of AU id and transform id) 133 for (StructureInterface interf:list) { 134 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 135 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 136 137 uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse)); 138 uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse)); 139 } 140 141 long start = System.currentTimeMillis(); 142 143 // we only need to calculate ASA for that subset (any translation of those will have same values) 144 for (String molecId:uniqAsaChains.keySet()) { 145 146 AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId), 147 AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); 148 149 double[] atomAsas = asaCalc.calculateAsas(); 150 151 chainAsas.put(molecId, atomAsas); 152 153 } 154 long end = System.currentTimeMillis(); 155 156 logger.debug("Calculated uncomplexed ASA for "+uniqAsaChains.size()+" orientation-unique chains. " 157 + "Time: "+((end-start)/1000.0)+" s"); 158 159 start = System.currentTimeMillis(); 160 161 // now we calculate the ASAs for each of the complexes 162 for (StructureInterface interf:list) { 163 164 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 165 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 166 167 interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse); 168 169 } 170 end = System.currentTimeMillis(); 171 172 logger.debug("Calculated complexes ASA for "+list.size()+" pairwise complexes. " 173 + "Time: "+((end-start)/1000.0)+" s"); 174 175 176 // finally we sort based on the ChainInterface.comparable() (based in interfaceArea) 177 sort(); 178 } 179 180 /** 181 * Sorts the interface list and reassigns ids based on new sorting 182 */ 183 public void sort() { 184 Collections.sort(list); 185 int i=1; 186 for (StructureInterface interf:list) { 187 interf.setId(i); 188 i++; 189 } 190 } 191 192 /** 193 * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence. 194 * This means that for any two interfaces in the same cluster: 195 * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order. 196 * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical. 197 * @return list of {@link StructureInterfaceCluster} objects. 198 * @since 5.0.0 199 */ 200 public List<StructureInterfaceCluster> getClustersNcs() { 201 return clustersNcs; 202 } 203 204 /** 205 * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list. 206 * Used to build up the NCS clustering. 207 * @param interfaceNew 208 * an interface to be added to the list. 209 * @param interfaceRef 210 * interfaceNew will be added to the cluster which contains interfaceRef. 211 * If interfaceRef is null, new cluster will be created for interfaceNew. 212 * @since 5.0.0 213 */ 214 public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) { 215 216 this.add(interfaceNew); 217 if (clustersNcs == null) { 218 clustersNcs = new ArrayList<>(); 219 } 220 221 if (interfaceRef == null) { 222 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 223 newCluster.addMember(interfaceNew); 224 clustersNcs.add(newCluster); 225 return; 226 } 227 228 Optional<StructureInterfaceCluster> clusterRef = 229 clustersNcs.stream(). 230 filter(r->r.getMembers().stream(). 231 anyMatch(c -> c.equals(interfaceRef))). 232 findFirst(); 233 234 if (clusterRef.isPresent()) { 235 clusterRef.get().addMember(interfaceNew); 236 return; 237 } 238 239 logger.warn("The specified reference interface, if not null, should have been added to this set previously. " + 240 "Creating new cluster and adding both interfaces. This is likely a bug."); 241 this.add(interfaceRef); 242 StructureInterfaceCluster newCluster = new StructureInterfaceCluster(); 243 newCluster.addMember(interfaceRef); 244 newCluster.addMember(interfaceNew); 245 clustersNcs.add(newCluster); 246 } 247 248 /** 249 * Removes from this interface list all interfaces with areas 250 * below the default cutoff area 251 * @see #DEFAULT_MINIMUM_INTERFACE_AREA 252 */ 253 public void removeInterfacesBelowArea() { 254 removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA); 255 } 256 257 /** 258 * Removes from this interface list all interfaces with areas 259 * below the given cutoff area 260 * @param area 261 */ 262 public void removeInterfacesBelowArea(double area) { 263 Iterator<StructureInterface> it = iterator(); 264 while (it.hasNext()) { 265 StructureInterface interf = it.next(); 266 if (interf.getTotalArea()<area) { 267 it.remove(); 268 } 269 } 270 } 271 272 /** 273 * Calculate the interface clusters for this StructureInterfaceList 274 * using a contact overlap score to measure the similarity of interfaces. 275 * Subsequent calls will use the cached value without recomputing the clusters. 276 * The contact overlap score cutoff to consider a pair in the same cluster is 277 * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF} 278 * @return 279 */ 280 public List<StructureInterfaceCluster> getClusters() { 281 return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF); 282 } 283 284 /** 285 * Calculate the interface clusters for this StructureInterfaceList 286 * using a contact overlap score to measure the similarity of interfaces. 287 * Subsequent calls will use the cached value without recomputing the clusters. 288 * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be 289 * clustered 290 * @return 291 */ 292 public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) { 293 if (clusters!=null) { 294 return clusters; 295 } 296 297 clusters = new ArrayList<StructureInterfaceCluster>(); 298 299 // nothing to do if we have no interfaces 300 if (list.size()==0) return clusters; 301 302 double[][] matrix = new double[list.size()][list.size()]; 303 304 for (int i=0;i<list.size();i++) { 305 for (int j=i+1;j<list.size();j++) { 306 StructureInterface iInterf = list.get(i); 307 StructureInterface jInterf = list.get(j); 308 309 double scoreDirect = iInterf.getContactOverlapScore(jInterf, false); 310 double scoreInvert = iInterf.getContactOverlapScore(jInterf, true); 311 312 double maxScore = Math.max(scoreDirect, scoreInvert); 313 314 matrix[i][j] = maxScore; 315 } 316 317 } 318 319 SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true); 320 321 Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff); 322 for (int clusterIdx:clusteredIndices.keySet()) { 323 List<StructureInterface> members = new ArrayList<StructureInterface>(); 324 for (int idx:clusteredIndices.get(clusterIdx)) { 325 members.add(list.get(idx)); 326 } 327 StructureInterfaceCluster cluster = new StructureInterfaceCluster(); 328 cluster.setMembers(members); 329 double averageScore = 0.0; 330 int countPairs = 0; 331 for (int i=0;i<members.size();i++) { 332 for (int j=i+1;j<members.size();j++) { 333 averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1]; 334 countPairs++; 335 } 336 } 337 if (countPairs>0) { 338 averageScore = averageScore/countPairs; 339 } else { 340 // if only one interface in cluster we set the score to the maximum 341 averageScore = 1.0; 342 } 343 cluster.setAverageScore(averageScore); 344 clusters.add(cluster); 345 } 346 347 // finally we have to set the back-references in each StructureInterface 348 for (StructureInterfaceCluster cluster:clusters) { 349 for (StructureInterface interf:cluster.getMembers()) { 350 interf.setCluster(cluster); 351 } 352 } 353 354 // now we sort by areas (descending) and assign ids based on that sorting 355 Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() { 356 @Override 357 public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) { 358 return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending 359 } 360 }); 361 int id = 1; 362 for (StructureInterfaceCluster cluster:clusters) { 363 cluster.setId(id); 364 id++; 365 } 366 367 368 return clusters; 369 } 370 371 @Override 372 public Iterator<StructureInterface> iterator() { 373 return list.iterator(); 374 } 375 376 @Override 377 public String toString() { 378 return list.toString(); 379 } 380 381 /** 382 * Calculates the interfaces for a structure using default parameters 383 * @param struc 384 * @return 385 */ 386 public static StructureInterfaceList calculateInterfaces(Structure struc) { 387 CrystalBuilder builder = new CrystalBuilder(struc); 388 StructureInterfaceList interfaces = builder.getUniqueInterfaces(); 389 logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces"); 390 interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance 391 Runtime.getRuntime().availableProcessors(), 392 StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE); 393 interfaces.removeInterfacesBelowArea(); 394 interfaces.getClusters(); 395 logger.debug("Found "+interfaces.size()+" interfaces"); 396 return interfaces; 397 } 398 399}