001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.contact;
022
023import java.io.Serializable;
024import java.util.ArrayList;
025import java.util.Collections;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.Optional;
030import java.util.Set;
031import java.util.TreeMap;
032
033import org.biojava.nbio.core.util.SingleLinkageClusterer;
034import org.biojava.nbio.structure.Atom;
035import org.biojava.nbio.structure.Structure;
036import org.biojava.nbio.structure.asa.AsaCalculator;
037import org.biojava.nbio.structure.xtal.CrystalBuilder;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041
042/**
043 * A list of interfaces between 2 molecules (2 sets of atoms)
044 *
045 * @author Jose Duarte
046 *
047 */
048public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> {
049
050        private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class);
051
052        /**
053         * Default minimum area for a contact between two chains to be considered a
054         * valid interface.
055         * @see #removeInterfacesBelowArea(double);
056         */
057        public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0;
058        /**
059         * Default number of points to use when calculating ASAs
060         * @see #calcAsas(int, int, int)
061         */
062        public static final int DEFAULT_ASA_SPHERE_POINTS = 3000;
063        /**
064         * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used
065         * @see #calcAsas(int, int, int)
066         */
067        public static final int DEFAULT_MIN_COFACTOR_SIZE = 40;
068
069        /**
070         * Any 2 interfaces with contact overlap score larger than this value
071         * will be considered to be clustered
072         */
073        public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2;
074
075        private static final long serialVersionUID = 1L;
076
077        private final List<StructureInterface> list;
078
079        private List<StructureInterfaceCluster> clusters = null;
080        private List<StructureInterfaceCluster> clustersNcs = null;
081
082        private Map<String, String> chainOrigNamesMap;
083
084        public StructureInterfaceList() {
085                this.list = new ArrayList<>();
086        }
087
088        public void add(StructureInterface interf) {
089                this.list.add(interf);
090        }
091
092        public int size() {
093                return this.list.size();
094        }
095
096        public List<StructureInterface> getList() {
097                return list;
098        }
099
100        /**
101         * Gets the interface corresponding to given id.
102         * The ids go from 1 to n
103         * If {@link #sort()} was called then the order is descendent by area.
104         * @param id
105         * @return
106         */
107        public StructureInterface get(int id) {
108                return list.get(id-1);
109        }
110
111        /**
112         * Calculates ASAs for all interfaces in list, both for the unbound
113         * chains and for the complex of the two chains together.
114         * Also sorts the interfaces based on calculated BSA areas (descending).
115         *
116         * <p>Uses default parameters
117         */
118        public void calcAsas() {
119                calcAsas( DEFAULT_ASA_SPHERE_POINTS,
120                                Runtime.getRuntime().availableProcessors(),
121                                DEFAULT_MIN_COFACTOR_SIZE );
122        }
123        /**
124         * Calculates ASAs for all interfaces in list, both for the unbound
125         * chains and for the complex of the two chains together.
126         * Also sorts the interfaces based on calculated BSA areas (descending)
127         * @param nSpherePoints
128         * @param nThreads
129         * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used
130         */
131        public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
132
133                // asa/bsa calculation
134                // NOTE in principle it is more efficient to calculate asas only once per unique chain
135                // BUT! the rolling ball algorithm gives slightly different values for same molecule in different
136                // rotations (due to sampling depending on orientation of axes grid).
137                // Both NACCESS and our own implementation behave like that.
138                // That's why we calculate ASAs for each rotation-unique molecule, otherwise
139                // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values
140
141
142                Map<String, Atom[]> uniqAsaChains = new TreeMap<>();
143                Map<String, double[]> chainAsas = new TreeMap<>();
144
145                List<StructureInterface> redundancyReducedList;
146                if (clustersNcs != null) {
147                        redundancyReducedList = new ArrayList<>();
148                        for (StructureInterfaceCluster ncsCluster : clustersNcs) {
149                                // we use the first one in list as the only one for which we calculate ASAs
150                                redundancyReducedList.add(ncsCluster.getMembers().get(0));
151                        }
152
153                } else {
154                        redundancyReducedList = list;
155                }
156
157                // first we gather rotation-unique chains (in terms of AU id and transform id)
158                for (StructureInterface interf:redundancyReducedList) {
159                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
160                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
161
162                        uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse));
163                        uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse));
164                }
165
166                logger.debug("Will calculate uncomplexed ASA for {} orientation-unique chains.", uniqAsaChains.size());
167
168                long start = System.currentTimeMillis();
169
170                // we only need to calculate ASA for that subset (any translation of those will have same values)
171                for (String molecId:uniqAsaChains.keySet()) {
172
173                        logger.debug("Calculating uncomplexed ASA for molecId {}, with {} atoms", molecId, uniqAsaChains.get(molecId).length);
174
175                        AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId),
176                                        AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
177
178                        double[] atomAsas = asaCalc.calculateAsas();
179
180                        chainAsas.put(molecId, atomAsas);
181
182                }
183                long end = System.currentTimeMillis();
184
185                logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0));
186
187                logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", redundancyReducedList.size());
188
189                start = System.currentTimeMillis();
190
191                // now we calculate the ASAs for each of the complexes
192                for (StructureInterface interf:redundancyReducedList) {
193
194                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
195                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
196
197                        logger.debug("Calculating complexed ASAs for interface {} between molecules {} and {}", interf.getId(), molecId1, molecId2);
198
199                        interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse);
200
201                }
202                end = System.currentTimeMillis();
203
204                logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", redundancyReducedList.size(), ((end-start)/1000.0));
205
206                // now let's populate the interface area value for the NCS-redundant ones from the reference interface (first one in list)
207                if (clustersNcs!=null) {
208                        if (chainOrigNamesMap==null)  {
209                                logger.warn("No chainOrigNamesMap is set. Considering NCS interfaces in same order as reference. This is likely a bug.");
210                        }
211                        for (StructureInterfaceCluster ncsCluster : clustersNcs) {
212                                StructureInterface refInterf = ncsCluster.getMembers().get(0);
213                                String refMolecId1 = refInterf.getMoleculeIds().getFirst();
214                                for (int i=1;i<ncsCluster.getMembers().size();i++) {
215                                        StructureInterface member = ncsCluster.getMembers().get(i);
216                                        member.setTotalArea(refInterf.getTotalArea());
217                                        String molecId1 = member.getMoleculeIds().getFirst();
218                                        if (areMolecIdsSameOrder(refMolecId1, molecId1)) {
219                                                // we add the reference interface GroupAsas as the GroupAsas for all other members, like that
220                                                // ResidueNumbers won't match in their chain ids, but otherwise all info is there without using a lot of memory
221                                                member.setFirstGroupAsas(refInterf.getFirstGroupAsas());
222                                                member.setSecondGroupAsas(refInterf.getSecondGroupAsas());
223                                        } else {
224                                                member.setFirstGroupAsas(refInterf.getSecondGroupAsas());
225                                                member.setSecondGroupAsas(refInterf.getFirstGroupAsas());
226                                        }
227                                }
228                        }
229                }
230
231                // finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
232                sort();
233        }
234
235        private boolean areMolecIdsSameOrder(String refMolecId, String molecId) {
236
237                if (chainOrigNamesMap==null) {
238                        // we've already warned above
239                        return true;
240                }
241
242                String refMolecIdOrig = chainOrigNamesMap.get(refMolecId);
243                String molecIdOrig = chainOrigNamesMap.get(molecId);
244
245                return (refMolecIdOrig.equals(molecIdOrig));
246        }
247
248        /**
249         * Sorts the interface list and reassigns ids based on new sorting
250         *
251         */
252        public void sort() {
253                Collections.sort(list);
254                int i=1;
255                for (StructureInterface interf:list) {
256                        interf.setId(i);
257                        i++;
258                }
259        }
260
261        /**
262         * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence.
263         * This means that for any two interfaces in the same cluster:
264         * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order.
265         * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical.
266         * @return list of {@link StructureInterfaceCluster} objects.
267         * @since 5.0.0
268         */
269        public List<StructureInterfaceCluster> getClustersNcs() {
270                return clustersNcs;
271        }
272
273        /**
274         * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list.
275         * Used to build up the NCS clustering.
276         * @param interfaceNew
277         *          an interface to be added to the list.
278         * @param interfaceRef
279         *          interfaceNew will be added to the cluster which contains interfaceRef.
280         *          If interfaceRef is null, new cluster will be created for interfaceNew.
281         * @since 5.0.0
282         */
283        public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) {
284
285                this.add(interfaceNew);
286                if (clustersNcs == null) {
287                        clustersNcs = new ArrayList<>();
288                }
289
290                if (interfaceRef == null) {
291                        StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
292                        newCluster.addMember(interfaceNew);
293                        clustersNcs.add(newCluster);
294                        return;
295                }
296
297                Optional<StructureInterfaceCluster> clusterRef =
298                                clustersNcs.stream().
299                                        filter(r->r.getMembers().stream().
300                                                anyMatch(c -> c.equals(interfaceRef))).
301                                                findFirst();
302
303                if (clusterRef.isPresent()) {
304                        clusterRef.get().addMember(interfaceNew);
305                        return;
306                }
307
308                logger.warn("The specified reference interface, if not null, should have been added to this set previously. " +
309                                "Creating new cluster and adding both interfaces. This is likely a bug.");
310                this.add(interfaceRef);
311                StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
312                newCluster.addMember(interfaceRef);
313                newCluster.addMember(interfaceNew);
314                clustersNcs.add(newCluster);
315        }
316
317        /**
318         * Sets a map with mapping from NCS chain names to original chain names.
319         * Necessary when {@link #addNcsEquivalent(StructureInterface, StructureInterface)} is used and NCS equivalent
320         * interfaces exist in this list and their names need mapping when setting ASAs.
321         * @param chainOrigNamesMap a map of NCS chain name to original chain name
322         */
323        public void setChainOrigNamesMap(Map<String, String> chainOrigNamesMap) {
324                this.chainOrigNamesMap = chainOrigNamesMap;
325        }
326
327        /**
328         * Removes from this interface list all interfaces with areas
329         * below the default cutoff area.
330     * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
331     * be 0 and thus all removed.
332         * @see #DEFAULT_MINIMUM_INTERFACE_AREA
333         */
334        public void removeInterfacesBelowArea() {
335                removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA);
336        }
337
338        /**
339         * Removes from this interface list all interfaces with areas
340         * below the given cutoff area.
341     * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
342     * be 0 and thus all removed.
343         * @param area the minimum interface buried surface area to keep. Interfaces below this value will be removed.
344         */
345        public void removeInterfacesBelowArea(double area) {
346
347                list.removeIf(interf -> interf.getTotalArea() < area);
348
349            if (clustersNcs != null) {
350                clustersNcs.removeIf(ncsCluster -> ncsCluster.getMembers().get(0).getTotalArea() < area);
351        }
352        }
353
354        /**
355         * Calculate the interface clusters for this StructureInterfaceList
356         * using a contact overlap score to measure the similarity of interfaces.
357         * Subsequent calls will use the cached value without recomputing the clusters.
358         * The contact overlap score cutoff to consider a pair in the same cluster is
359         * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF}
360         * @return
361         */
362        public List<StructureInterfaceCluster> getClusters() {
363                return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF);
364        }
365
366        /**
367         * Calculate the interface clusters for this StructureInterfaceList
368         * using Jaccard contact set scores to measure the similarity of interfaces.
369         * Subsequent calls will use the cached value without recomputing the clusters.
370         * The clusters will be assigned ids by sorting descending by {@link StructureInterfaceCluster#getTotalArea()}
371         * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
372         * clustered
373         * @return
374         */
375        public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) {
376                if (clusters!=null) {
377                        return clusters;
378                }
379
380                clusters = new ArrayList<>();
381
382                // nothing to do if we have no interfaces
383                if (list.size()==0) return clusters;
384
385                logger.debug("Calculating all-vs-all Jaccard scores for {} interfaces", list.size());
386                double[][] matrix = new double[list.size()][list.size()];
387
388                for (int i=0;i<list.size();i++) {
389                        for (int j=i+1;j<list.size();j++) {
390                                StructureInterface iInterf = list.get(i);
391                                StructureInterface jInterf = list.get(j);
392
393                                double scoreDirect = iInterf.getContactOverlapScore(jInterf, false);
394                                double scoreInvert = iInterf.getContactOverlapScore(jInterf, true);
395
396                                double maxScore = Math.max(scoreDirect, scoreInvert);
397
398                                matrix[i][j] = maxScore;
399                        }
400
401                }
402
403                logger.debug("Will now cluster {} interfaces based on full all-vs-all Jaccard scores matrix", list.size());
404                SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
405
406                Map<Integer, Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
407                for (int clusterIdx:clusteredIndices.keySet()) {
408                        List<StructureInterface> members = new ArrayList<>();
409                        for (int idx:clusteredIndices.get(clusterIdx)) {
410                                members.add(list.get(idx));
411                        }
412                        StructureInterfaceCluster cluster = new StructureInterfaceCluster();
413                        cluster.setMembers(members);
414                        double averageScore = 0.0;
415                        int countPairs = 0;
416                        for (int i=0;i<members.size();i++) {
417                int iIdx = list.indexOf(members.get(i));
418                                for (int j=i+1;j<members.size();j++) {
419                                        averageScore += matrix[iIdx][list.indexOf(members.get(j))];
420                                        countPairs++;
421                                }
422                        }
423                        if (countPairs>0) {
424                                averageScore = averageScore/countPairs;
425                        } else {
426                                // if only one interface in cluster we set the score to the maximum
427                                averageScore = 1.0;
428                        }
429                        cluster.setAverageScore(averageScore);
430                        clusters.add(cluster);
431                }
432
433                // finally we have to set the back-references in each StructureInterface
434                for (StructureInterfaceCluster cluster:clusters) {
435                        for (StructureInterface interf:cluster.getMembers()) {
436                                interf.setCluster(cluster);
437                        }
438                }
439                logger.debug("Done clustering {} interfaces based on full all-vs-all Jaccard scores matrix. Found a total of {} clusters", list.size(), clusters.size());
440
441                // now we sort by areas (descending) and assign ids based on that sorting
442                clusters.sort((o1, o2) -> Double.compare(o2.getTotalArea(), o1.getTotalArea())); //note we invert so that sorting is descending
443
444                int id = 1;
445                for (StructureInterfaceCluster cluster:clusters) {
446                        cluster.setId(id);
447                        id++;
448                }
449
450                return clusters;
451        }
452
453        @Override
454        public Iterator<StructureInterface> iterator() {
455                return list.iterator();
456        }
457
458        @Override
459        public String toString() {
460                return list.toString();
461        }
462
463        /**
464         * Calculates the interfaces for a structure using default parameters
465         * @param struc
466         * @return
467         */
468        public static StructureInterfaceList calculateInterfaces(Structure struc) {
469                CrystalBuilder builder = new CrystalBuilder(struc);
470                StructureInterfaceList interfaces = builder.getUniqueInterfaces();
471                logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces");
472                interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance
473                                Runtime.getRuntime().availableProcessors(),
474                                StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
475                interfaces.removeInterfacesBelowArea();
476                interfaces.getClusters();
477                logger.debug("Found "+interfaces.size()+" interfaces");
478                return interfaces;
479        }
480
481}