001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.contact;
022
023import java.io.Serializable;
024import java.util.*;
025
026import org.biojava.nbio.core.util.SingleLinkageClusterer;
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.Structure;
029import org.biojava.nbio.structure.asa.AsaCalculator;
030import org.biojava.nbio.structure.xtal.CrystalBuilder;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034
035/**
036 * A list of interfaces between 2 molecules (2 sets of atoms)
037 *
038 * @author Jose Duarte
039 *
040 */
041public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> {
042
043        private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class);
044
045        /**
046         * Default minimum area for a contact between two chains to be considered a
047         * valid interface.
048         * @see #removeInterfacesBelowArea(double);
049         */
050        public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0;
051        /**
052         * Default number of points to use when calculating ASAs
053         * @see #calcAsas(int, int, int)
054         */
055        public static final int DEFAULT_ASA_SPHERE_POINTS = 3000;
056        /**
057         * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used
058         * @see #calcAsas(int, int, int)
059         */
060        public static final int DEFAULT_MIN_COFACTOR_SIZE = 40;
061
062        /**
063         * Any 2 interfaces with contact overlap score larger than this value
064         * will be considered to be clustered
065         */
066        public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2;
067
068        private static final long serialVersionUID = 1L;
069
070        private List<StructureInterface> list;
071
072        private List<StructureInterfaceCluster> clusters = null;
073        private List<StructureInterfaceCluster> clustersNcs = null;
074
075        public StructureInterfaceList() {
076                this.list = new ArrayList<>();
077        }
078
079        public void add(StructureInterface interf) {
080                this.list.add(interf);
081        }
082
083        public int size() {
084                return this.list.size();
085        }
086
087        /**
088         * Gets the interface corresponding to given id.
089         * The ids go from 1 to n
090         * If {@link #sort()} was called then the order is descendent by area.
091         * @param id
092         * @return
093         */
094        public StructureInterface get(int id) {
095                return list.get(id-1);
096        }
097
098        /**
099         * Calculates ASAs for all interfaces in list, both for the unbound
100         * chains and for the complex of the two chains together.
101         * Also sorts the interfaces based on calculated BSA areas (descending).
102         *
103         * <p>Uses default parameters
104         */
105        public void calcAsas() {
106                calcAsas( DEFAULT_ASA_SPHERE_POINTS,
107                                Runtime.getRuntime().availableProcessors(),
108                                DEFAULT_MIN_COFACTOR_SIZE );
109        }
110        /**
111         * Calculates ASAs for all interfaces in list, both for the unbound
112         * chains and for the complex of the two chains together.
113         * Also sorts the interfaces based on calculated BSA areas (descending)
114         * @param nSpherePoints
115         * @param nThreads
116         * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used
117         */
118        public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
119
120                // asa/bsa calculation
121                // NOTE in principle it is more efficient to calculate asas only once per unique chain
122                // BUT! the rolling ball algorithm gives slightly different values for same molecule in different
123                // rotations (due to sampling depending on orientation of axes grid).
124                // Both NACCESS and our own implementation behave like that.
125                // That's why we calculate ASAs for each rotation-unique molecule, otherwise
126                // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values
127
128
129                Map<String, Atom[]> uniqAsaChains = new TreeMap<>();
130                Map<String, double[]> chainAsas = new TreeMap<>();
131
132                // first we gather rotation-unique chains (in terms of AU id and transform id)
133                for (StructureInterface interf:list) {
134                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
135                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
136
137                        uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse));
138                        uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse));
139                }
140
141                logger.debug("Will calculate uncomplexed ASA for {} orientation-unique chains.", uniqAsaChains.size());
142
143                long start = System.currentTimeMillis();
144
145                // we only need to calculate ASA for that subset (any translation of those will have same values)
146                for (String molecId:uniqAsaChains.keySet()) {
147
148                        logger.debug("Calculating uncomplexed ASA for molecId {}, with {} atoms", molecId, uniqAsaChains.get(molecId).length);
149
150                        AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId),
151                                        AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
152
153                        double[] atomAsas = asaCalc.calculateAsas();
154
155                        chainAsas.put(molecId, atomAsas);
156
157                }
158                long end = System.currentTimeMillis();
159
160                logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0));
161
162                logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", list.size());
163
164                start = System.currentTimeMillis();
165
166                // now we calculate the ASAs for each of the complexes
167                for (StructureInterface interf:list) {
168
169                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
170                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
171
172                        logger.debug("Calculating complexed ASAs for interface {} between molecules {} and {}", interf.getId(), molecId1, molecId2);
173
174                        interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse);
175
176                }
177                end = System.currentTimeMillis();
178
179                logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", list.size(), ((end-start)/1000.0));
180
181
182                // finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
183                sort();
184        }
185
186        /**
187         * Sorts the interface list and reassigns ids based on new sorting
188         */
189        public void sort() {
190                Collections.sort(list);
191                int i=1;
192                for (StructureInterface interf:list) {
193                        interf.setId(i);
194                        i++;
195                }
196        }
197
198        /**
199         * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence.
200         * This means that for any two interfaces in the same cluster:
201         * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order.
202         * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical.
203         * @return list of {@link StructureInterfaceCluster} objects.
204         * @since 5.0.0
205         */
206        public List<StructureInterfaceCluster> getClustersNcs() {
207                return clustersNcs;
208        }
209
210        /**
211         * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list.
212         * Used to build up the NCS clustering.
213         * @param interfaceNew
214         *          an interface to be added to the list.
215         * @param interfaceRef
216         *          interfaceNew will be added to the cluster which contains interfaceRef.
217         *          If interfaceRef is null, new cluster will be created for interfaceNew.
218         * @since 5.0.0
219         */
220        public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) {
221
222                this.add(interfaceNew);
223                if (clustersNcs == null) {
224                        clustersNcs = new ArrayList<>();
225                }
226
227                if (interfaceRef == null) {
228                        StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
229                        newCluster.addMember(interfaceNew);
230                        clustersNcs.add(newCluster);
231                        return;
232                }
233
234                Optional<StructureInterfaceCluster> clusterRef =
235                                clustersNcs.stream().
236                                        filter(r->r.getMembers().stream().
237                                                anyMatch(c -> c.equals(interfaceRef))).
238                                                findFirst();
239
240                if (clusterRef.isPresent()) {
241                        clusterRef.get().addMember(interfaceNew);
242                        return;
243                }
244
245                logger.warn("The specified reference interface, if not null, should have been added to this set previously. " +
246                                "Creating new cluster and adding both interfaces. This is likely a bug.");
247                this.add(interfaceRef);
248                StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
249                newCluster.addMember(interfaceRef);
250                newCluster.addMember(interfaceNew);
251                clustersNcs.add(newCluster);
252        }
253
254        /**
255         * Removes from this interface list all interfaces with areas
256         * below the default cutoff area
257         * @see #DEFAULT_MINIMUM_INTERFACE_AREA
258         */
259        public void removeInterfacesBelowArea() {
260                removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA);
261        }
262
263        /**
264         * Removes from this interface list all interfaces with areas
265         * below the given cutoff area
266         * @param area
267         */
268        public void removeInterfacesBelowArea(double area) {
269                Iterator<StructureInterface> it = iterator();
270                while (it.hasNext()) {
271                        StructureInterface interf = it.next();
272                        if (interf.getTotalArea()<area) {
273                                it.remove();
274                        }
275                }
276        }
277
278        /**
279         * Calculate the interface clusters for this StructureInterfaceList
280         * using a contact overlap score to measure the similarity of interfaces.
281         * Subsequent calls will use the cached value without recomputing the clusters.
282         * The contact overlap score cutoff to consider a pair in the same cluster is
283         * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF}
284         * @return
285         */
286        public List<StructureInterfaceCluster> getClusters() {
287                return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF);
288        }
289
290        /**
291         * Calculate the interface clusters for this StructureInterfaceList
292         * using a contact overlap score to measure the similarity of interfaces.
293         * Subsequent calls will use the cached value without recomputing the clusters.
294         * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
295         * clustered
296         * @return
297         */
298        public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) {
299                if (clusters!=null) {
300                        return clusters;
301                }
302
303                clusters = new ArrayList<StructureInterfaceCluster>();
304
305                // nothing to do if we have no interfaces
306                if (list.size()==0) return clusters;
307
308                double[][] matrix = new double[list.size()][list.size()];
309
310                for (int i=0;i<list.size();i++) {
311                        for (int j=i+1;j<list.size();j++) {
312                                StructureInterface iInterf = list.get(i);
313                                StructureInterface jInterf = list.get(j);
314
315                                double scoreDirect = iInterf.getContactOverlapScore(jInterf, false);
316                                double scoreInvert = iInterf.getContactOverlapScore(jInterf, true);
317
318                                double maxScore = Math.max(scoreDirect, scoreInvert);
319
320                                matrix[i][j] = maxScore;
321                        }
322
323                }
324
325                SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
326
327                Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
328                for (int clusterIdx:clusteredIndices.keySet()) {
329                        List<StructureInterface> members = new ArrayList<StructureInterface>();
330                        for (int idx:clusteredIndices.get(clusterIdx)) {
331                                members.add(list.get(idx));
332                        }
333                        StructureInterfaceCluster cluster = new StructureInterfaceCluster();
334                        cluster.setMembers(members);
335                        double averageScore = 0.0;
336                        int countPairs = 0;
337                        for (int i=0;i<members.size();i++) {
338                                for (int j=i+1;j<members.size();j++) {
339                                        averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1];
340                                        countPairs++;
341                                }
342                        }
343                        if (countPairs>0) {
344                                averageScore = averageScore/countPairs;
345                        } else {
346                                // if only one interface in cluster we set the score to the maximum
347                                averageScore = 1.0;
348                        }
349                        cluster.setAverageScore(averageScore);
350                        clusters.add(cluster);
351                }
352
353                // finally we have to set the back-references in each StructureInterface
354                for (StructureInterfaceCluster cluster:clusters) {
355                        for (StructureInterface interf:cluster.getMembers()) {
356                                interf.setCluster(cluster);
357                        }
358                }
359
360                // now we sort by areas (descending) and assign ids based on that sorting
361                Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() {
362                        @Override
363                        public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) {
364                                return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
365                        }
366                });
367                int id = 1;
368                for (StructureInterfaceCluster cluster:clusters) {
369                        cluster.setId(id);
370                        id++;
371                }
372
373
374                return clusters;
375        }
376
377        @Override
378        public Iterator<StructureInterface> iterator() {
379                return list.iterator();
380        }
381
382        @Override
383        public String toString() {
384                return list.toString();
385        }
386
387        /**
388         * Calculates the interfaces for a structure using default parameters
389         * @param struc
390         * @return
391         */
392        public static StructureInterfaceList calculateInterfaces(Structure struc) {
393                CrystalBuilder builder = new CrystalBuilder(struc);
394                StructureInterfaceList interfaces = builder.getUniqueInterfaces();
395                logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces");
396                interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance
397                                Runtime.getRuntime().availableProcessors(),
398                                StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
399                interfaces.removeInterfacesBelowArea();
400                interfaces.getClusters();
401                logger.debug("Found "+interfaces.size()+" interfaces");
402                return interfaces;
403        }
404
405}