001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.contact;
022
023import java.io.Serializable;
024import java.util.ArrayList;
025import java.util.Collections;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.Optional;
030import java.util.Set;
031import java.util.TreeMap;
032
033import org.biojava.nbio.core.util.SingleLinkageClusterer;
034import org.biojava.nbio.structure.Atom;
035import org.biojava.nbio.structure.Structure;
036import org.biojava.nbio.structure.asa.AsaCalculator;
037import org.biojava.nbio.structure.xtal.CrystalBuilder;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041
042/**
043 * A list of interfaces between 2 molecules (2 sets of atoms)
044 *
045 * @author Jose Duarte
046 *
047 */
048public class StructureInterfaceList implements Serializable, Iterable<StructureInterface> {
049
050        private static final Logger logger = LoggerFactory.getLogger(StructureInterfaceList.class);
051
052        /**
053         * Default minimum area for a contact between two chains to be considered a
054         * valid interface.
055         * @see #removeInterfacesBelowArea(double);
056         */
057        public static final double DEFAULT_MINIMUM_INTERFACE_AREA = 35.0;
058        /**
059         * Default number of points to use when calculating ASAs
060         * @see #calcAsas(int, int, int)
061         */
062        public static final int DEFAULT_ASA_SPHERE_POINTS = 3000;
063        /**
064         * Default minimum size of cofactor molecule (non-chain HET atoms) that will be used
065         * @see #calcAsas(int, int, int)
066         */
067        public static final int DEFAULT_MIN_COFACTOR_SIZE = 40;
068
069        /**
070         * Any 2 interfaces with contact overlap score larger than this value
071         * will be considered to be clustered
072         */
073        public static final double DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF = 0.2;
074
075        private static final long serialVersionUID = 1L;
076
077        private List<StructureInterface> list;
078
079        private List<StructureInterfaceCluster> clusters = null;
080        private List<StructureInterfaceCluster> clustersNcs = null;
081
082        private Map<String, String> chainOrigNamesMap;
083
084        public StructureInterfaceList() {
085                this.list = new ArrayList<>();
086        }
087
088        public void add(StructureInterface interf) {
089                this.list.add(interf);
090        }
091
092        public int size() {
093                return this.list.size();
094        }
095
096        /**
097         * Gets the interface corresponding to given id.
098         * The ids go from 1 to n
099         * If {@link #sort()} was called then the order is descendent by area.
100         * @param id
101         * @return
102         */
103        public StructureInterface get(int id) {
104                return list.get(id-1);
105        }
106
107        /**
108         * Calculates ASAs for all interfaces in list, both for the unbound
109         * chains and for the complex of the two chains together.
110         * Also sorts the interfaces based on calculated BSA areas (descending).
111         *
112         * <p>Uses default parameters
113         */
114        public void calcAsas() {
115                calcAsas( DEFAULT_ASA_SPHERE_POINTS,
116                                Runtime.getRuntime().availableProcessors(),
117                                DEFAULT_MIN_COFACTOR_SIZE );
118        }
119        /**
120         * Calculates ASAs for all interfaces in list, both for the unbound
121         * chains and for the complex of the two chains together.
122         * Also sorts the interfaces based on calculated BSA areas (descending)
123         * @param nSpherePoints
124         * @param nThreads
125         * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used
126         */
127        public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
128
129                // asa/bsa calculation
130                // NOTE in principle it is more efficient to calculate asas only once per unique chain
131                // BUT! the rolling ball algorithm gives slightly different values for same molecule in different
132                // rotations (due to sampling depending on orientation of axes grid).
133                // Both NACCESS and our own implementation behave like that.
134                // That's why we calculate ASAs for each rotation-unique molecule, otherwise
135                // we get discrepancies (not very big but annoying) which lead to things like negative (small) bsa values
136
137
138                Map<String, Atom[]> uniqAsaChains = new TreeMap<>();
139                Map<String, double[]> chainAsas = new TreeMap<>();
140
141                List<StructureInterface> redundancyReducedList;
142                if (clustersNcs != null) {
143                        redundancyReducedList = new ArrayList<>();
144                        for (StructureInterfaceCluster ncsCluster : clustersNcs) {
145                                // we use the first one in list as the only one for which we calculate ASAs
146                                redundancyReducedList.add(ncsCluster.getMembers().get(0));
147                        }
148
149                } else {
150                        redundancyReducedList = list;
151                }
152
153                // first we gather rotation-unique chains (in terms of AU id and transform id)
154                for (StructureInterface interf:redundancyReducedList) {
155                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
156                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
157
158                        uniqAsaChains.put(molecId1, interf.getFirstAtomsForAsa(cofactorSizeToUse));
159                        uniqAsaChains.put(molecId2, interf.getSecondAtomsForAsa(cofactorSizeToUse));
160                }
161
162                logger.debug("Will calculate uncomplexed ASA for {} orientation-unique chains.", uniqAsaChains.size());
163
164                long start = System.currentTimeMillis();
165
166                // we only need to calculate ASA for that subset (any translation of those will have same values)
167                for (String molecId:uniqAsaChains.keySet()) {
168
169                        logger.debug("Calculating uncomplexed ASA for molecId {}, with {} atoms", molecId, uniqAsaChains.get(molecId).length);
170
171                        AsaCalculator asaCalc = new AsaCalculator(uniqAsaChains.get(molecId),
172                                        AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
173
174                        double[] atomAsas = asaCalc.calculateAsas();
175
176                        chainAsas.put(molecId, atomAsas);
177
178                }
179                long end = System.currentTimeMillis();
180
181                logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0));
182
183                logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", redundancyReducedList.size());
184
185                start = System.currentTimeMillis();
186
187                // now we calculate the ASAs for each of the complexes
188                for (StructureInterface interf:redundancyReducedList) {
189
190                        String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
191                        String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
192
193                        logger.debug("Calculating complexed ASAs for interface {} between molecules {} and {}", interf.getId(), molecId1, molecId2);
194
195                        interf.setAsas(chainAsas.get(molecId1), chainAsas.get(molecId2), nSpherePoints, nThreads, cofactorSizeToUse);
196
197                }
198                end = System.currentTimeMillis();
199
200                logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", redundancyReducedList.size(), ((end-start)/1000.0));
201
202                // now let's populate the interface area value for the NCS-redundant ones from the reference interface (first one in list)
203                if (clustersNcs!=null) {
204                        if (chainOrigNamesMap==null)  {
205                                logger.warn("No chainOrigNamesMap is set. Considering NCS interfaces in same order as reference. This is likely a bug.");
206                        }
207                        for (StructureInterfaceCluster ncsCluster : clustersNcs) {
208                                StructureInterface refInterf = ncsCluster.getMembers().get(0);
209                                String refMolecId1 = refInterf.getMoleculeIds().getFirst();
210                                for (int i=1;i<ncsCluster.getMembers().size();i++) {
211                                        StructureInterface member = ncsCluster.getMembers().get(i);
212                                        member.setTotalArea(refInterf.getTotalArea());
213                                        String molecId1 = member.getMoleculeIds().getFirst();
214                                        if (areMolecIdsSameOrder(refMolecId1, molecId1)) {
215                                                // we add the reference interface GroupAsas as the GroupAsas for all other members, like that
216                                                // ResidueNumbers won't match in their chain ids, but otherwise all info is there without using a lot of memory
217                                                member.setFirstGroupAsas(refInterf.getFirstGroupAsas());
218                                                member.setSecondGroupAsas(refInterf.getSecondGroupAsas());
219                                        } else {
220                                                member.setFirstGroupAsas(refInterf.getSecondGroupAsas());
221                                                member.setSecondGroupAsas(refInterf.getFirstGroupAsas());
222                                        }
223                                }
224                        }
225                }
226
227                // finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
228                sort();
229        }
230
231        private boolean areMolecIdsSameOrder(String refMolecId, String molecId) {
232
233                if (chainOrigNamesMap==null) {
234                        // we've already warned above
235                        return true;
236                }
237
238                String refMolecIdOrig = chainOrigNamesMap.get(refMolecId);
239                String molecIdOrig = chainOrigNamesMap.get(molecId);
240
241                return (refMolecIdOrig.equals(molecIdOrig));
242        }
243
244        /**
245         * Sorts the interface list and reassigns ids based on new sorting
246         *
247         */
248        public void sort() {
249                Collections.sort(list);
250                int i=1;
251                for (StructureInterface interf:list) {
252                        interf.setId(i);
253                        i++;
254                }
255        }
256
257        /**
258         * Get the interface clusters for this StructureInterfaceList grouped by NCS-equivalence.
259         * This means that for any two interfaces in the same cluster:
260         * 1. The chains forming the first interface are NCS-copies of the chains forming the second interface, in any order.
261         * 2. Relative orientation of the chains is preserved, i.e. the contacts are identical.
262         * @return list of {@link StructureInterfaceCluster} objects.
263         * @since 5.0.0
264         */
265        public List<StructureInterfaceCluster> getClustersNcs() {
266                return clustersNcs;
267        }
268
269        /**
270         * Add an interface to the list, possibly defining it as NCS-equivalent to an interface already in the list.
271         * Used to build up the NCS clustering.
272         * @param interfaceNew
273         *          an interface to be added to the list.
274         * @param interfaceRef
275         *          interfaceNew will be added to the cluster which contains interfaceRef.
276         *          If interfaceRef is null, new cluster will be created for interfaceNew.
277         * @since 5.0.0
278         */
279        public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface interfaceRef) {
280
281                this.add(interfaceNew);
282                if (clustersNcs == null) {
283                        clustersNcs = new ArrayList<>();
284                }
285
286                if (interfaceRef == null) {
287                        StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
288                        newCluster.addMember(interfaceNew);
289                        clustersNcs.add(newCluster);
290                        return;
291                }
292
293                Optional<StructureInterfaceCluster> clusterRef =
294                                clustersNcs.stream().
295                                        filter(r->r.getMembers().stream().
296                                                anyMatch(c -> c.equals(interfaceRef))).
297                                                findFirst();
298
299                if (clusterRef.isPresent()) {
300                        clusterRef.get().addMember(interfaceNew);
301                        return;
302                }
303
304                logger.warn("The specified reference interface, if not null, should have been added to this set previously. " +
305                                "Creating new cluster and adding both interfaces. This is likely a bug.");
306                this.add(interfaceRef);
307                StructureInterfaceCluster newCluster = new StructureInterfaceCluster();
308                newCluster.addMember(interfaceRef);
309                newCluster.addMember(interfaceNew);
310                clustersNcs.add(newCluster);
311        }
312
313        /**
314         * Sets a map with mapping from NCS chain names to original chain names.
315         * Necessary when {@link #addNcsEquivalent(StructureInterface, StructureInterface)} is used and NCS equivalent
316         * interfaces exist in this list and their names need mapping when setting ASAs.
317         * @param chainOrigNamesMap a map of NCS chain name to original chain name
318         */
319        public void setChainOrigNamesMap(Map<String, String> chainOrigNamesMap) {
320                this.chainOrigNamesMap = chainOrigNamesMap;
321        }
322
323        /**
324         * Removes from this interface list all interfaces with areas
325         * below the default cutoff area.
326     * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
327     * be 0 and thus all removed.
328         * @see #DEFAULT_MINIMUM_INTERFACE_AREA
329         */
330        public void removeInterfacesBelowArea() {
331                removeInterfacesBelowArea(DEFAULT_MINIMUM_INTERFACE_AREA);
332        }
333
334        /**
335         * Removes from this interface list all interfaces with areas
336         * below the given cutoff area.
337     * Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
338     * be 0 and thus all removed.
339         * @param area the minimum interface buried surface area to keep. Interfaces below this value will be removed.
340         */
341        public void removeInterfacesBelowArea(double area) {
342
343                list.removeIf(interf -> interf.getTotalArea() < area);
344
345            if (clustersNcs != null) {
346                clustersNcs.removeIf(ncsCluster -> ncsCluster.getMembers().get(0).getTotalArea() < area);
347        }
348        }
349
350        /**
351         * Calculate the interface clusters for this StructureInterfaceList
352         * using a contact overlap score to measure the similarity of interfaces.
353         * Subsequent calls will use the cached value without recomputing the clusters.
354         * The contact overlap score cutoff to consider a pair in the same cluster is
355         * the value {@link #DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF}
356         * @return
357         */
358        public List<StructureInterfaceCluster> getClusters() {
359                return getClusters(DEFAULT_CONTACT_OVERLAP_SCORE_CLUSTER_CUTOFF);
360        }
361
362        /**
363         * Calculate the interface clusters for this StructureInterfaceList
364         * using a contact overlap score to measure the similarity of interfaces.
365         * Subsequent calls will use the cached value without recomputing the clusters.
366         * The clusters will be assigned ids by sorting descending by {@link StructureInterfaceCluster#getTotalArea()}
367         * @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
368         * clustered
369         * @return
370         */
371        public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClusterCutoff) {
372                if (clusters!=null) {
373                        return clusters;
374                }
375
376                clusters = new ArrayList<>();
377
378                // nothing to do if we have no interfaces
379                if (list.size()==0) return clusters;
380
381                double[][] matrix = new double[list.size()][list.size()];
382
383                for (int i=0;i<list.size();i++) {
384                        for (int j=i+1;j<list.size();j++) {
385                                StructureInterface iInterf = list.get(i);
386                                StructureInterface jInterf = list.get(j);
387
388                                double scoreDirect = iInterf.getContactOverlapScore(jInterf, false);
389                                double scoreInvert = iInterf.getContactOverlapScore(jInterf, true);
390
391                                double maxScore = Math.max(scoreDirect, scoreInvert);
392
393                                matrix[i][j] = maxScore;
394                        }
395
396                }
397
398                SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
399
400                Map<Integer, Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
401                for (int clusterIdx:clusteredIndices.keySet()) {
402                        List<StructureInterface> members = new ArrayList<>();
403                        for (int idx:clusteredIndices.get(clusterIdx)) {
404                                members.add(list.get(idx));
405                        }
406                        StructureInterfaceCluster cluster = new StructureInterfaceCluster();
407                        cluster.setMembers(members);
408                        double averageScore = 0.0;
409                        int countPairs = 0;
410                        for (int i=0;i<members.size();i++) {
411                int iIdx = list.indexOf(members.get(i));
412                                for (int j=i+1;j<members.size();j++) {
413                                        averageScore += matrix[iIdx][list.indexOf(members.get(j))];
414                                        countPairs++;
415                                }
416                        }
417                        if (countPairs>0) {
418                                averageScore = averageScore/countPairs;
419                        } else {
420                                // if only one interface in cluster we set the score to the maximum
421                                averageScore = 1.0;
422                        }
423                        cluster.setAverageScore(averageScore);
424                        clusters.add(cluster);
425                }
426
427                // finally we have to set the back-references in each StructureInterface
428                for (StructureInterfaceCluster cluster:clusters) {
429                        for (StructureInterface interf:cluster.getMembers()) {
430                                interf.setCluster(cluster);
431                        }
432                }
433
434                // now we sort by areas (descending) and assign ids based on that sorting
435                clusters.sort((o1, o2) -> {
436                        return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
437                });
438                int id = 1;
439                for (StructureInterfaceCluster cluster:clusters) {
440                        cluster.setId(id);
441                        id++;
442                }
443
444                return clusters;
445        }
446
447        @Override
448        public Iterator<StructureInterface> iterator() {
449                return list.iterator();
450        }
451
452        @Override
453        public String toString() {
454                return list.toString();
455        }
456
457        /**
458         * Calculates the interfaces for a structure using default parameters
459         * @param struc
460         * @return
461         */
462        public static StructureInterfaceList calculateInterfaces(Structure struc) {
463                CrystalBuilder builder = new CrystalBuilder(struc);
464                StructureInterfaceList interfaces = builder.getUniqueInterfaces();
465                logger.debug("Calculating ASA for "+interfaces.size()+" potential interfaces");
466                interfaces.calcAsas(StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS, //fewer for performance
467                                Runtime.getRuntime().availableProcessors(),
468                                StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
469                interfaces.removeInterfacesBelowArea();
470                interfaces.getClusters();
471                logger.debug("Found "+interfaces.size()+" interfaces");
472                return interfaces;
473        }
474
475}