001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.contact;
022
023import java.io.Serializable;
024import java.util.ArrayList;
025import java.util.Collections;
026import java.util.Comparator;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.TreeMap;
032
033import org.biojava.nbio.core.util.SingleLinkageClusterer;
034import org.biojava.nbio.structure.Atom;
035import org.biojava.nbio.structure.Structure;
036import org.biojava.nbio.structure.asa.AsaCalculator;
037import org.biojava.nbio.structure.xtal.CrystalBuilder;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041
042/**
043 * A list of interfaces between 2 molecules (2 sets of atoms)
044 *
045 * @author duarte_j
046 *
047 */
048public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> {
049
050        private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class);
051
052        /**
053         * Default minimum area for a contact between two chains to be considered a
054         * valid interface.
055         * @see #removeInterfacesBelowArea(double);
056         */
057        public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0;
058        /**
059         * Default number of points to use when calculating ASAs
060         * @see #calcAsas(int, int, int)
061         */
062        public static final int DEFAULT_ASA_SPHERE_POINTS = 3000;
063        /**
064         * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used
065         * @see #calcAsas(int, int, int)
066         */
067        public static final int DEFAULT_MIN_COFACTOR_SIZE = 40;
068
069        /**
070         * Any 2 interfaces with contact overlap score larger than this value
071         * will be considered to be clustered
072         */
073        public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2;
074
075        private static final long serialVersionUID = 1L;
076
077        private List<StructureInterface> list;
078
079        private List<StructureInterfaceCluster> clusters;
080
081
082        public StructureInterfaceList() {
083                this.list = new ArrayList<StructureInterface>();
084        }
085
086        public void add(StructureInterface interf) {
087                this.list.add(interf);
088        }
089
090        public int size() {
091                return this.list.size();
092        }
093
094        /**
095         * Gets the interface corresponding to given id.
096         * The ids go from 1 to n
097         * If {@link #sort()} was called then the order is descendent by area.
098         * @param id
099         * @return
100         */
101        public StructureInterface get(int id) {
102                return list.get(id-1);
103        }
104
105        /**
106         * Calculates ASAs for all interfaces in list, both for the unbound
107         * chains and for the complex of the two chains together.
108         * Also sorts the interfaces based on calculated BSA areas (descending).
109         *
110         * <p>Uses default parameters
111         */
112        public void calcAsas() {
113                calcAsas( DEFAULT_ASA_SPHERE_POINTS,
114                                Runtime.getRuntime().availableProcessors(),
115                                DEFAULT_MIN_COFACTOR_SIZE );
116        }
117        /**
118         * Calculates ASAs for all interfaces in list, both for the unbound
119         * chains and for the complex of the two chains together.
120         * Also sorts the interfaces based on calculated BSA areas (descending)
121         * @param nSpherePoints
122         * @param nThreads
123         * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used
124         */
125        public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
126
127                // asa/bsa calculation
128                // NOTE in principle it is more efficient to calculate asas only once per unique chain
129                // BUT! the rolling ball algorithm gives slightly different values for same molecule in different
130                // rotations (due to sampling depending on orientation of axes grid).
131                // Both NACCESS and our own implementation behave like that.
132                // That's why we calculate ASAs for each rotation-unique molecule, otherwise
133                // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values
134
135
136                Map<String, Atom[]> uniqAsaChains = new TreeMap<String, Atom[]>();
137                Map<String, double[]> chainAsas = new TreeMap<String, double[]>();
138
139                // first we gather rotation-unique chains (in terms of AU id and transform id)
140                for (StructureInterface interf:list) {
141                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
142                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
143
144                        uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse));
145                        uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse));
146                }
147
148                long start = System.currentTimeMillis();
149
150                // we only need to calculate ASA for that subset (any translation of those will have same values)
151                for (String molecId:uniqAsaChains.keySet()) {
152
153                        AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId),
154                                        AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
155
156                        double[] atomAsas = asaCalc.calculateAsas();
157
158                        chainAsas.put(molecId, atomAsas);
159
160                }
161                long end = System.currentTimeMillis();
162
163                logger.debug("Calculated uncomplexed ASA for "+uniqAsaChains.size()+" orientation-unique chains. "
164                                        + "Time: "+((end-start)/1000.0)+" s");
165
166                start = System.currentTimeMillis();
167
168                // now we calculate the ASAs for each of the complexes
169                for (StructureInterface interf:list) {
170
171                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
172                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
173
174                        interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse);
175
176                }
177                end = System.currentTimeMillis();
178
179                logger.debug("Calculated complexes ASA for "+list.size()+" pairwise complexes. "
180                                        + "Time: "+((end-start)/1000.0)+" s");
181
182
183                // finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
184                sort();
185        }
186
187        /**
188         * Sorts the interface list and reassigns ids based on new sorting
189         */
190        public void sort() {
191                Collections.sort(list);
192                int i=1;
193                for (StructureInterface interf:list) {
194                        interf.setId(i);
195                        i++;
196                }
197        }
198
199        /**
200         * Removes from this interface list all interfaces with areas
201         * below the default cutoff area
202         * @see #DEFAULT_MINIMUM_INTERFACE_AREA
203         */
204        public void removeInterfacesBelowArea() {
205                removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA);
206        }
207
208        /**
209         * Removes from this interface list all interfaces with areas
210         * below the given cutoff area
211         * @param area
212         */
213        public void removeInterfacesBelowArea(double area) {
214                Iterator<StructureInterface> it = iterator();
215                while (it.hasNext()) {
216                        StructureInterface interf = it.next();
217                        if (interf.getTotalArea()<area) {
218                                it.remove();
219                        }
220                }
221        }
222
223        /**
224         * Calculate the interface clusters for this StructureInterfaceList
225         * using a contact overlap score to measure the similarity of interfaces.
226         * Subsequent calls will use the cached value without recomputing the clusters.
227         * The contact overlap score cutoff to consider a pair in the same cluster is
228         * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF}
229         * @return
230         */
231        public List<StructureInterfaceCluster> getClusters() {
232                return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF);
233        }
234
235        /**
236         * Calculate the interface clusters for this StructureInterfaceList
237         * using a contact overlap score to measure the similarity of interfaces.
238         * Subsequent calls will use the cached value without recomputing the clusters.
239         * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
240         * clustered
241         * @return
242         */
243        public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) {
244                if (clusters!=null) {
245                        return clusters;
246                }
247
248                clusters = new ArrayList<StructureInterfaceCluster>();
249
250                // nothing to do if we have no interfaces
251                if (list.size()==0) return clusters;
252
253                double[][] matrix = new double[list.size()][list.size()];
254
255                for (int i=0;i<list.size();i++) {
256                        for (int j=i+1;j<list.size();j++) {
257                                StructureInterface iInterf = list.get(i);
258                                StructureInterface jInterf = list.get(j);
259
260                                double scoreDirect = iInterf.getContactOverlapScore(jInterf, false);
261                                double scoreInvert = iInterf.getContactOverlapScore(jInterf, true);
262
263                                double maxScore = Math.max(scoreDirect, scoreInvert);
264
265                                matrix[i][j] = maxScore;
266                        }
267
268                }
269
270                SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
271
272                Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
273                for (int clusterIdx:clusteredIndices.keySet()) {
274                        List<StructureInterface> members = new ArrayList<StructureInterface>();
275                        for (int idx:clusteredIndices.get(clusterIdx)) {
276                                members.add(list.get(idx));
277                        }
278                        StructureInterfaceCluster cluster = new StructureInterfaceCluster();
279                        cluster.setMembers(members);
280                        double averageScore = 0.0;
281                        int countPairs = 0;
282                        for (int i=0;i<members.size();i++) {
283                                for (int j=i+1;j<members.size();j++) {
284                                        averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1];
285                                        countPairs++;
286                                }
287                        }
288                        if (countPairs>0) {
289                                averageScore = averageScore/countPairs;
290                        } else {
291                                // if only one interface in cluster we set the score to the maximum
292                                averageScore = 1.0;
293                        }
294                        cluster.setAverageScore(averageScore);
295                        clusters.add(cluster);
296                }
297
298                // finally we have to set the back-references in each StructureInterface
299                for (StructureInterfaceCluster cluster:clusters) {
300                        for (StructureInterface interf:cluster.getMembers()) {
301                                interf.setCluster(cluster);
302                        }
303                }
304
305                // now we sort by areas (descending) and assign ids based on that sorting
306                Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() {
307                        @Override
308                        public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) {
309                                return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
310                        }
311                });
312                int id = 1;
313                for (StructureInterfaceCluster cluster:clusters) {
314                        cluster.setId(id);
315                        id++;
316                }
317
318
319                return clusters;
320        }
321
322        @Override
323        public Iterator<StructureInterface> iterator() {
324                return list.iterator();
325        }
326
327        @Override
328        public String toString() {
329                return list.toString();
330        }
331
332        /**
333         * Calculates the interfaces for a structure using default parameters
334         * @param struc
335         * @return
336         */
337        public static StructureInterfaceList calculateInterfaces(Structure struc) {
338                CrystalBuilder builder = new CrystalBuilder(struc);
339                StructureInterfaceList interfaces = builder.getUniqueInterfaces();
340                logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces");
341                interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance
342                                Runtime.getRuntime().availableProcessors(),
343                                StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
344                interfaces.removeInterfacesBelowArea();
345                interfaces.getClusters();
346                logger.debug("Found "+interfaces.size()+" interfaces");
347                return interfaces;
348        }
349
350}