001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.contact; 022 023import java.io.Serializable; 024import java.util.ArrayList; 025import java.util.Collections; 026import java.util.Comparator; 027import java.util.Iterator; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.TreeMap; 032 033import org.biojava.nbio.core.util.SingleLinkageClusterer; 034import org.biojava.nbio.structure.Atom; 035import org.biojava.nbio.structure.Structure; 036import org.biojava.nbio.structure.asa.AsaCalculator; 037import org.biojava.nbio.structure.xtal.CrystalBuilder; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041 042/** 043 * A list of interfaces between 2 molecules (2 sets of atoms) 044 * 045 * @author duarte_j 046 * 047 */ 048public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> { 049 050 private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class); 051 052 /** 053 * Default minimum area for a contact between two chains to be considered a 054 * valid interface. 055 * @see #removeInterfacesBelowArea(double); 056 */ 057 public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0; 058 /** 059 * Default number of points to use when calculating ASAs 060 * @see #calcAsas(int, int, int) 061 */ 062 public static final int DEFAULT_ASA_SPHERE_POINTS = 3000; 063 /** 064 * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used 065 * @see #calcAsas(int, int, int) 066 */ 067 public static final int DEFAULT_MIN_COFACTOR_SIZE = 40; 068 069 /** 070 * Any 2 interfaces with contact overlap score larger than this value 071 * will be considered to be clustered 072 */ 073 public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2; 074 075 private static final long serialVersionUID = 1L; 076 077 private List<StructureInterface> list; 078 079 private List<StructureInterfaceCluster> clusters; 080 081 082 public StructureInterfaceList() { 083 this.list = new ArrayList<StructureInterface>(); 084 } 085 086 public void add(StructureInterface interf) { 087 this.list.add(interf); 088 } 089 090 public int size() { 091 return this.list.size(); 092 } 093 094 /** 095 * Gets the interface corresponding to given id. 096 * The ids go from 1 to n 097 * If {@link #sort()} was called then the order is descendent by area. 098 * @param id 099 * @return 100 */ 101 public StructureInterface get(int id) { 102 return list.get(id-1); 103 } 104 105 /** 106 * Calculates ASAs for all interfaces in list, both for the unbound 107 * chains and for the complex of the two chains together. 108 * Also sorts the interfaces based on calculated BSA areas (descending). 109 * 110 * <p>Uses default parameters 111 */ 112 public void calcAsas() { 113 calcAsas( DEFAULT_ASA_SPHERE_POINTS, 114 Runtime.getRuntime().availableProcessors(), 115 DEFAULT_MIN_COFACTOR_SIZE ); 116 } 117 /** 118 * Calculates ASAs for all interfaces in list, both for the unbound 119 * chains and for the complex of the two chains together. 120 * Also sorts the interfaces based on calculated BSA areas (descending) 121 * @param nSpherePoints 122 * @param nThreads 123 * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used 124 */ 125 public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) { 126 127 // asa/bsa calculation 128 // NOTE in principle it is more efficient to calculate asas only once per unique chain 129 // BUT! the rolling ball algorithm gives slightly different values for same molecule in different 130 // rotations (due to sampling depending on orientation of axes grid). 131 // Both NACCESS and our own implementation behave like that. 132 // That's why we calculate ASAs for each rotation-unique molecule, otherwise 133 // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values 134 135 136 Map<String, Atom[]> uniqAsaChains = new TreeMap<String, Atom[]>(); 137 Map<String, double[]> chainAsas = new TreeMap<String, double[]>(); 138 139 // first we gather rotation-unique chains (in terms of AU id and transform id) 140 for (StructureInterface interf:list) { 141 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 142 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 143 144 uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse)); 145 uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse)); 146 } 147 148 long start = System.currentTimeMillis(); 149 150 // we only need to calculate ASA for that subset (any translation of those will have same values) 151 for (String molecId:uniqAsaChains.keySet()) { 152 153 AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId), 154 AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); 155 156 double[] atomAsas = asaCalc.calculateAsas(); 157 158 chainAsas.put(molecId, atomAsas); 159 160 } 161 long end = System.currentTimeMillis(); 162 163 logger.debug("Calculated uncomplexed ASA for "+uniqAsaChains.size()+" orientation-unique chains. " 164 + "Time: "+((end-start)/1000.0)+" s"); 165 166 start = System.currentTimeMillis(); 167 168 // now we calculate the ASAs for each of the complexes 169 for (StructureInterface interf:list) { 170 171 String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId(); 172 String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId(); 173 174 interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse); 175 176 } 177 end = System.currentTimeMillis(); 178 179 logger.debug("Calculated complexes ASA for "+list.size()+" pairwise complexes. " 180 + "Time: "+((end-start)/1000.0)+" s"); 181 182 183 // finally we sort based on the ChainInterface.comparable() (based in interfaceArea) 184 sort(); 185 } 186 187 /** 188 * Sorts the interface list and reassigns ids based on new sorting 189 */ 190 public void sort() { 191 Collections.sort(list); 192 int i=1; 193 for (StructureInterface interf:list) { 194 interf.setId(i); 195 i++; 196 } 197 } 198 199 /** 200 * Removes from this interface list all interfaces with areas 201 * below the default cutoff area 202 * @see #DEFAULT_MINIMUM_INTERFACE_AREA 203 */ 204 public void removeInterfacesBelowArea() { 205 removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA); 206 } 207 208 /** 209 * Removes from this interface list all interfaces with areas 210 * below the given cutoff area 211 * @param area 212 */ 213 public void removeInterfacesBelowArea(double area) { 214 Iterator<StructureInterface> it = iterator(); 215 while (it.hasNext()) { 216 StructureInterface interf = it.next(); 217 if (interf.getTotalArea()<area) { 218 it.remove(); 219 } 220 } 221 } 222 223 /** 224 * Calculate the interface clusters for this StructureInterfaceList 225 * using a contact overlap score to measure the similarity of interfaces. 226 * Subsequent calls will use the cached value without recomputing the clusters. 227 * The contact overlap score cutoff to consider a pair in the same cluster is 228 * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF} 229 * @return 230 */ 231 public List<StructureInterfaceCluster> getClusters() { 232 return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF); 233 } 234 235 /** 236 * Calculate the interface clusters for this StructureInterfaceList 237 * using a contact overlap score to measure the similarity of interfaces. 238 * Subsequent calls will use the cached value without recomputing the clusters. 239 * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be 240 * clustered 241 * @return 242 */ 243 public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) { 244 if (clusters!=null) { 245 return clusters; 246 } 247 248 clusters = new ArrayList<StructureInterfaceCluster>(); 249 250 // nothing to do if we have no interfaces 251 if (list.size()==0) return clusters; 252 253 double[][] matrix = new double[list.size()][list.size()]; 254 255 for (int i=0;i<list.size();i++) { 256 for (int j=i+1;j<list.size();j++) { 257 StructureInterface iInterf = list.get(i); 258 StructureInterface jInterf = list.get(j); 259 260 double scoreDirect = iInterf.getContactOverlapScore(jInterf, false); 261 double scoreInvert = iInterf.getContactOverlapScore(jInterf, true); 262 263 double maxScore = Math.max(scoreDirect, scoreInvert); 264 265 matrix[i][j] = maxScore; 266 } 267 268 } 269 270 SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true); 271 272 Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff); 273 for (int clusterIdx:clusteredIndices.keySet()) { 274 List<StructureInterface> members = new ArrayList<StructureInterface>(); 275 for (int idx:clusteredIndices.get(clusterIdx)) { 276 members.add(list.get(idx)); 277 } 278 StructureInterfaceCluster cluster = new StructureInterfaceCluster(); 279 cluster.setMembers(members); 280 double averageScore = 0.0; 281 int countPairs = 0; 282 for (int i=0;i<members.size();i++) { 283 for (int j=i+1;j<members.size();j++) { 284 averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1]; 285 countPairs++; 286 } 287 } 288 if (countPairs>0) { 289 averageScore = averageScore/countPairs; 290 } else { 291 // if only one interface in cluster we set the score to the maximum 292 averageScore = 1.0; 293 } 294 cluster.setAverageScore(averageScore); 295 clusters.add(cluster); 296 } 297 298 // finally we have to set the back-references in each StructureInterface 299 for (StructureInterfaceCluster cluster:clusters) { 300 for (StructureInterface interf:cluster.getMembers()) { 301 interf.setCluster(cluster); 302 } 303 } 304 305 // now we sort by areas (descending) and assign ids based on that sorting 306 Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() { 307 @Override 308 public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) { 309 return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending 310 } 311 }); 312 int id = 1; 313 for (StructureInterfaceCluster cluster:clusters) { 314 cluster.setId(id); 315 id++; 316 } 317 318 319 return clusters; 320 } 321 322 @Override 323 public Iterator<StructureInterface> iterator() { 324 return list.iterator(); 325 } 326 327 @Override 328 public String toString() { 329 return list.toString(); 330 } 331 332 /** 333 * Calculates the interfaces for a structure using default parameters 334 * @param struc 335 * @return 336 */ 337 public static StructureInterfaceList calculateInterfaces(Structure struc) { 338 CrystalBuilder builder = new CrystalBuilder(struc); 339 StructureInterfaceList interfaces = builder.getUniqueInterfaces(); 340 logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces"); 341 interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance 342 Runtime.getRuntime().availableProcessors(), 343 StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE); 344 interfaces.removeInterfacesBelowArea(); 345 interfaces.getClusters(); 346 logger.debug("Found "+interfaces.size()+" interfaces"); 347 return interfaces; 348 } 349 350}