001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.contact;
022
023import java.io.Serializable;
024import java.util.*;
025
026import org.biojava.nbio.core.util.SingleLinkageClusterer;
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.Structure;
029import org.biojava.nbio.structure.asa.AsaCalculator;
030import org.biojava.nbio.structure.xtal.CrystalBuilder;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034
035/**
036 * A list of interfaces between 2 molecules (2 sets of atoms)
037 *
038 * @author duarte_j
039 *
040 */
041public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> {
042
043        private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class);
044
045        /**
046         * Default minimum area for a contact between two chains to be considered a
047         * valid interface.
048         * @see #removeInterfacesBelowArea(double);
049         */
050        public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0;
051        /**
052         * Default number of points to use when calculating ASAs
053         * @see #calcAsas(int, int, int)
054         */
055        public static final int DEFAULT_ASA_SPHERE_POINTS = 3000;
056        /**
057         * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used
058         * @see #calcAsas(int, int, int)
059         */
060        public static final int DEFAULT_MIN_COFACTOR_SIZE = 40;
061
062        /**
063         * Any 2 interfaces with contact overlap score larger than this value
064         * will be considered to be clustered
065         */
066        public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2;
067
068        private static final long serialVersionUID = 1L;
069
070        private List<StructureInterface> list;
071
072        private List<StructureInterfaceCluster> clusters = null;
073        private List<StructureInterfaceCluster> clustersNcs = null;
074
075        public StructureInterfaceList() {
076                this.list = new ArrayList<>();
077        }
078
079        public void add(StructureInterface interf) {
080                this.list.add(interf);
081        }
082
083        public int size() {
084                return this.list.size();
085        }
086
087        /**
088         * Gets the interface corresponding to given id.
089         * The ids go from 1 to n
090         * If {@link #sort()} was called then the order is descendent by area.
091         * @param id
092         * @return
093         */
094        public StructureInterface get(int id) {
095                return list.get(id-1);
096        }
097
098        /**
099         * Calculates ASAs for all interfaces in list, both for the unbound
100         * chains and for the complex of the two chains together.
101         * Also sorts the interfaces based on calculated BSA areas (descending).
102         *
103         * <p>Uses default parameters
104         */
105        public void calcAsas() {
106                calcAsas( DEFAULT_ASA_SPHERE_POINTS,
107                                Runtime.getRuntime().availableProcessors(),
108                                DEFAULT_MIN_COFACTOR_SIZE );
109        }
110        /**
111         * Calculates ASAs for all interfaces in list, both for the unbound
112         * chains and for the complex of the two chains together.
113         * Also sorts the interfaces based on calculated BSA areas (descending)
114         * @param nSpherePoints
115         * @param nThreads
116         * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used
117         */
118        public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
119
120                // asa/bsa calculation
121                // NOTE in principle it is more efficient to calculate asas only once per unique chain
122                // BUT! the rolling ball algorithm gives slightly different values for same molecule in different
123                // rotations (due to sampling depending on orientation of axes grid).
124                // Both NACCESS and our own implementation behave like that.
125                // That's why we calculate ASAs for each rotation-unique molecule, otherwise
126                // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values
127
128
129                Map<String, Atom[]> uniqAsaChains = new TreeMap<String, Atom[]>();
130                Map<String, double[]> chainAsas = new TreeMap<String, double[]>();
131
132                // first we gather rotation-unique chains (in terms of AU id and transform id)
133                for (StructureInterface interf:list) {
134                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
135                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
136
137                        uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse));
138                        uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse));
139                }
140
141                long start = System.currentTimeMillis();
142
143                // we only need to calculate ASA for that subset (any translation of those will have same values)
144                for (String molecId:uniqAsaChains.keySet()) {
145
146                        AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId),
147                                        AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
148
149                        double[] atomAsas = asaCalc.calculateAsas();
150
151                        chainAsas.put(molecId, atomAsas);
152
153                }
154                long end = System.currentTimeMillis();
155
156                logger.debug("Calculated uncomplexed ASA for "+uniqAsaChains.size()+" orientation-unique chains. "
157                                        + "Time: "+((end-start)/1000.0)+" s");
158
159                start = System.currentTimeMillis();
160
161                // now we calculate the ASAs for each of the complexes
162                for (StructureInterface interf:list) {
163
164                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
165                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
166
167                        interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse);
168
169                }
170                end = System.currentTimeMillis();
171
172                logger.debug("Calculated complexes ASA for "+list.size()+" pairwise complexes. "
173                                        + "Time: "+((end-start)/1000.0)+" s");
174
175
176                // finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
177                sort();
178        }
179
180        /**
181         * Sorts the interface list and reassigns ids based on new sorting
182         */
183        public void sort() {
184                Collections.sort(list);
185                int i=1;
186                for (StructureInterface interf:list) {
187                        interf.setId(i);
188                        i++;
189                }
190        }
191
192        /**
193         * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence.
194         * This means that for any two interfaces in the same cluster:
195         * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order.
196         * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical.
197         * @return list of {@link StructureInterfaceCluster} objects.
198         * @since 5.0.0
199         */
200        public List<StructureInterfaceCluster> getClustersNcs() {
201                return clustersNcs;
202        }
203
204        /**
205         * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list.
206         * Used to build up the NCS clustering.
207         * @param interfaceNew
208         *          an interface to be added to the list.
209         * @param interfaceRef
210         *          interfaceNew will be added to the cluster which contains interfaceRef.
211         *          If interfaceRef is null, new cluster will be created for interfaceNew.
212         * @since 5.0.0
213         */
214        public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) {
215
216                this.add(interfaceNew);
217                if (clustersNcs == null) {
218                        clustersNcs = new ArrayList<>();
219                }
220
221                if (interfaceRef == null) {
222                        StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
223                        newCluster.addMember(interfaceNew);
224                        clustersNcs.add(newCluster);
225                        return;
226                }
227
228                Optional<StructureInterfaceCluster> clusterRef =
229                                clustersNcs.stream().
230                                        filter(r->r.getMembers().stream().
231                                                anyMatch(c -> c.equals(interfaceRef))).
232                                                findFirst();
233
234                if (clusterRef.isPresent()) {
235                        clusterRef.get().addMember(interfaceNew);
236                        return;
237                }
238
239                logger.warn("The specified reference interface, if not null, should have been added to this set previously. " +
240                                "Creating new cluster and adding both interfaces. This is likely a bug.");
241                this.add(interfaceRef);
242                StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
243                newCluster.addMember(interfaceRef);
244                newCluster.addMember(interfaceNew);
245                clustersNcs.add(newCluster);
246        }
247
248        /**
249         * Removes from this interface list all interfaces with areas
250         * below the default cutoff area
251         * @see #DEFAULT_MINIMUM_INTERFACE_AREA
252         */
253        public void removeInterfacesBelowArea() {
254                removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA);
255        }
256
257        /**
258         * Removes from this interface list all interfaces with areas
259         * below the given cutoff area
260         * @param area
261         */
262        public void removeInterfacesBelowArea(double area) {
263                Iterator<StructureInterface> it = iterator();
264                while (it.hasNext()) {
265                        StructureInterface interf = it.next();
266                        if (interf.getTotalArea()<area) {
267                                it.remove();
268                        }
269                }
270        }
271
272        /**
273         * Calculate the interface clusters for this StructureInterfaceList
274         * using a contact overlap score to measure the similarity of interfaces.
275         * Subsequent calls will use the cached value without recomputing the clusters.
276         * The contact overlap score cutoff to consider a pair in the same cluster is
277         * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF}
278         * @return
279         */
280        public List<StructureInterfaceCluster> getClusters() {
281                return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF);
282        }
283
284        /**
285         * Calculate the interface clusters for this StructureInterfaceList
286         * using a contact overlap score to measure the similarity of interfaces.
287         * Subsequent calls will use the cached value without recomputing the clusters.
288         * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
289         * clustered
290         * @return
291         */
292        public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) {
293                if (clusters!=null) {
294                        return clusters;
295                }
296
297                clusters = new ArrayList<StructureInterfaceCluster>();
298
299                // nothing to do if we have no interfaces
300                if (list.size()==0) return clusters;
301
302                double[][] matrix = new double[list.size()][list.size()];
303
304                for (int i=0;i<list.size();i++) {
305                        for (int j=i+1;j<list.size();j++) {
306                                StructureInterface iInterf = list.get(i);
307                                StructureInterface jInterf = list.get(j);
308
309                                double scoreDirect = iInterf.getContactOverlapScore(jInterf, false);
310                                double scoreInvert = iInterf.getContactOverlapScore(jInterf, true);
311
312                                double maxScore = Math.max(scoreDirect, scoreInvert);
313
314                                matrix[i][j] = maxScore;
315                        }
316
317                }
318
319                SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
320
321                Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
322                for (int clusterIdx:clusteredIndices.keySet()) {
323                        List<StructureInterface> members = new ArrayList<StructureInterface>();
324                        for (int idx:clusteredIndices.get(clusterIdx)) {
325                                members.add(list.get(idx));
326                        }
327                        StructureInterfaceCluster cluster = new StructureInterfaceCluster();
328                        cluster.setMembers(members);
329                        double averageScore = 0.0;
330                        int countPairs = 0;
331                        for (int i=0;i<members.size();i++) {
332                                for (int j=i+1;j<members.size();j++) {
333                                        averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1];
334                                        countPairs++;
335                                }
336                        }
337                        if (countPairs>0) {
338                                averageScore = averageScore/countPairs;
339                        } else {
340                                // if only one interface in cluster we set the score to the maximum
341                                averageScore = 1.0;
342                        }
343                        cluster.setAverageScore(averageScore);
344                        clusters.add(cluster);
345                }
346
347                // finally we have to set the back-references in each StructureInterface
348                for (StructureInterfaceCluster cluster:clusters) {
349                        for (StructureInterface interf:cluster.getMembers()) {
350                                interf.setCluster(cluster);
351                        }
352                }
353
354                // now we sort by areas (descending) and assign ids based on that sorting
355                Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() {
356                        @Override
357                        public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) {
358                                return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
359                        }
360                });
361                int id = 1;
362                for (StructureInterfaceCluster cluster:clusters) {
363                        cluster.setId(id);
364                        id++;
365                }
366
367
368                return clusters;
369        }
370
371        @Override
372        public Iterator<StructureInterface> iterator() {
373                return list.iterator();
374        }
375
376        @Override
377        public String toString() {
378                return list.toString();
379        }
380
381        /**
382         * Calculates the interfaces for a structure using default parameters
383         * @param struc
384         * @return
385         */
386        public static StructureInterfaceList calculateInterfaces(Structure struc) {
387                CrystalBuilder builder = new CrystalBuilder(struc);
388                StructureInterfaceList interfaces = builder.getUniqueInterfaces();
389                logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces");
390                interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance
391                                Runtime.getRuntime().availableProcessors(),
392                                StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
393                interfaces.removeInterfacesBelowArea();
394                interfaces.getClusters();
395                logger.debug("Found "+interfaces.size()+" interfaces");
396                return interfaces;
397        }
398
399}