001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.quaternary;
022
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.HashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.Map.Entry;
029
030import javax.vecmath.Matrix4d;
031
032import org.biojava.nbio.structure.Atom;
033import org.biojava.nbio.structure.Calc;
034import org.biojava.nbio.structure.Structure;
035import org.biojava.nbio.structure.StructureException;
036import org.biojava.nbio.structure.StructureTools;
037import org.biojava.nbio.structure.align.multiple.Block;
038import org.biojava.nbio.structure.align.multiple.BlockImpl;
039import org.biojava.nbio.structure.align.multiple.BlockSet;
040import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
041import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
042import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
043import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
044import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
045import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer;
046import org.biojava.nbio.structure.cluster.Subunit;
047import org.biojava.nbio.structure.cluster.SubunitCluster;
048import org.biojava.nbio.structure.cluster.SubunitClusterer;
049import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
050import org.biojava.nbio.structure.cluster.SubunitExtractor;
051import org.biojava.nbio.structure.contact.Pair;
052import org.biojava.nbio.structure.geometry.SuperPositions;
053import org.biojava.nbio.structure.geometry.UnitQuaternions;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057/**
058 * Quaternary Structure Alignment (QS-Align). The algorithm takes as input two
059 * protein structures at the quaternary structure level (multiple chains or
060 * subunits) and calculates the equivalent subunit matching and a residue-based
061 * alignment, together with usual alignment quality scores.
062 *
063 * @author Aleix Lafita
064 * @since 5.0.0
065 *
066 */
067public class QsAlign {
068
069        private static final Logger logger = LoggerFactory.getLogger(QsAlign.class);
070
071        public static QsAlignResult align(Structure s1, Structure s2,
072                        SubunitClustererParameters cParams, QsAlignParameters aParams)
073                        throws StructureException {
074                return align(
075                                SubunitExtractor.extractSubunits(s1,
076                                                cParams.getAbsoluteMinimumSequenceLength(),
077                                                cParams.getMinimumSequenceLengthFraction(),
078                                                cParams.getMinimumSequenceLength()),
079                                SubunitExtractor.extractSubunits(s2,
080                                                cParams.getAbsoluteMinimumSequenceLength(),
081                                                cParams.getMinimumSequenceLengthFraction(),
082                                                cParams.getMinimumSequenceLength()), cParams, aParams);
083        }
084
085        public static QsAlignResult align(List<Subunit> s1, List<Subunit> s2,
086                        SubunitClustererParameters cParams, QsAlignParameters aParams)
087                        throws StructureException {
088
089                QsAlignResult result = new QsAlignResult(s1, s2);
090
091                // SETP 1: cluster each group of subunits O(N^2*L^2) - intra
092
093                List<SubunitCluster> c1 = SubunitClusterer.cluster(s1, cParams).getClusters();
094                List<SubunitCluster> c2 = SubunitClusterer.cluster(s2, cParams).getClusters();
095
096                // STEP 2: match each subunit cluster between groups O(N^2*L^2) - inter
097                Map<Integer, Integer> clusterMap = new HashMap<Integer, Integer>();
098                for (int i = 0; i < c1.size(); i++) {
099                        for (int j = 0; j < c2.size(); j++) {
100
101                                if (clusterMap.keySet().contains(i))
102                                        break;
103                                if (clusterMap.values().contains(j))
104                                        continue;
105
106                                // Use structural alignment to match the subunit clusters
107                                if (c1.get(i).mergeStructure(c2.get(j),cParams)) {
108                                        clusterMap.put(i, j);
109                                }
110                        }
111                }
112
113                logger.info("Cluster Map: " + clusterMap.toString());
114                result.setClusters(c1);
115
116                // STEP 3: Align the assemblies for each cluster match O(N^2*L)
117                for (int globalKey : clusterMap.keySet()) {
118
119                        // Obtain the clusters
120                        SubunitCluster clust1 = c1.get(globalKey);
121                        SubunitCluster clust2 = c2.get(clusterMap.get(globalKey));
122
123                        // Take a cluster match as reference
124                        int index1 = 0;
125                        int index2 = clust1.size() - clust2.size();
126                        Map<Integer, Integer> subunitMap = new HashMap<Integer, Integer>();
127                        subunitMap.put(index1, index2);
128
129                        // Map cluster id to their subunit matching
130                        Map<Integer, Map<Integer, Integer>> clustSubunitMap = new HashMap<Integer, Map<Integer, Integer>>();
131                        clustSubunitMap.put(globalKey, subunitMap);
132
133                        // Change order of key set so that globalKey is first
134                        List<Integer> keySet = new ArrayList<Integer>(clusterMap.keySet());
135                        keySet.remove((Integer) globalKey);
136                        keySet.add(0, globalKey);
137
138                        for (int key : clusterMap.keySet()) {
139
140                                // Recover subunitMap if it is the reference, new one otherwise
141                                if (key == globalKey)
142                                        subunitMap = clustSubunitMap.get(key);
143                                else
144                                        subunitMap = new HashMap<Integer, Integer>();
145
146                                // Obtain the clusters of each subunit group
147                                clust1 = c1.get(key);
148                                clust2 = c2.get(clusterMap.get(key));
149
150                                // Get the initial subunit indices of each group
151                                index1 = 0;
152                                index2 = clust1.size() - clust2.size();
153
154                                for (int i = 0; i < index2; i++) {
155                                        for (int j = index2; j < clust1.size(); j++) {
156
157                                                if (subunitMap.keySet().contains(i))
158                                                        break;
159                                                if (subunitMap.values().contains(j))
160                                                        continue;
161
162                                                // Obtain cumulative transformation matrix
163                                                Matrix4d transform = getTransformForClusterSubunitMap(
164                                                                c1, clustSubunitMap);
165
166                                                // Obtain Atom arrays of the subunit pair to match
167                                                Atom[] atoms1 = clust1.getAlignedAtomsSubunit(i);
168                                                Atom[] atoms2 = clust1.getAlignedAtomsSubunit(j);
169
170                                                // Obtain centroids and transform the second
171                                                Atom centr1 = Calc.getCentroid(atoms1);
172                                                Atom centr2 = Calc.getCentroid(atoms2);
173                                                Calc.transform(centr2, transform);
174
175                                                // 1- Check that the distance fulfills maximum
176                                                double dCentroid = Calc.getDistance(centr1, centr2);
177                                                if (dCentroid > aParams.getdCutoff()) {
178                                                        logger.debug(String.format("Subunit matching %d "
179                                                                        + "vs %d of cluster %d could not be "
180                                                                        + "matched, because centroid distance is "
181                                                                        + "%.2f", index1, index2, key, dCentroid));
182                                                        continue;
183                                                }
184
185                                                // Transform coordinates of second
186                                                Atom[] atoms2c = StructureTools.cloneAtomArray(atoms2);
187                                                Calc.transform(atoms2c, transform);
188
189                                                // 2- Check the orientation metric condition
190                                                double qOrient = UnitQuaternions.orientationAngle(
191                                                                Calc.atomsToPoints(atoms1),
192                                                                Calc.atomsToPoints(atoms2c), false);
193                                                qOrient = Math.min(Math.abs(2*Math.PI - qOrient), qOrient);
194                                                if (qOrient > aParams.getMaxOrientationAngle()) {
195                                                        logger.debug(String.format("Subunit matching %d "
196                                                                        + "vs %d of cluster %d could not be "
197                                                                        + "matched, because orientation metric is "
198                                                                        + "%.2f", i, j, key, qOrient));
199                                                        continue;
200                                                }
201
202                                                // 3- Check the RMSD condition
203                                                double rmsd = Calc.rmsd(atoms1, atoms2c);
204                                                if (rmsd > aParams.getMaxRmsd()) {
205                                                        logger.debug(String.format("Subunit matching %d "
206                                                                        + "vs %d of cluster %d could not be "
207                                                                        + "matched, because RMSD is %.2f", i,
208                                                                        j, key, rmsd));
209                                                        continue;
210                                                }
211
212                                                logger.info(String.format("Subunit matching %d vs %d"
213                                                                + " of cluster %d with centroid distance %.2f"
214                                                                + ", orientation metric %.2f and RMSD %.2f",
215                                                                i, j, key, dCentroid, qOrient, rmsd));
216
217                                                subunitMap.put(i, j);
218                                        }
219                                }
220
221                                clustSubunitMap.put(key, subunitMap);
222                        }
223
224                        logger.info("Cluster Subunit Map: " + clustSubunitMap.toString());
225
226                        // Unfold the nested map into subunit map and alignment
227                        subunitMap = new HashMap<Integer, Integer>();
228                        List<Integer> alignRes1 = new ArrayList<Integer>();
229                        List<Integer> alignRes2 = new ArrayList<Integer>();
230                        List<Atom> atomArray1 = new ArrayList<Atom>();
231                        List<Atom> atomArray2 = new ArrayList<Atom>();
232
233                        for (int key : clustSubunitMap.keySet()) {
234
235                                // Obtain the cluster and the alignment in it
236                                SubunitCluster cluster = c1.get(key);
237                                List<List<Integer>> clusterEqrs = cluster
238                                                .getMultipleAlignment().getBlock(0).getAlignRes();
239
240                                for (Entry<Integer, Integer> pair : clustSubunitMap.get(key)
241                                                .entrySet()) {
242
243                                        int i = pair.getKey();
244                                        int j = pair.getValue();
245
246                                        // Obtain the indices of the original Subunit Lists
247                                        int orig1 = s1.indexOf(cluster.getSubunits().get(i));
248                                        int orig2 = s2.indexOf(cluster.getSubunits().get(j));
249
250                                        // Append rescaled aligned residue indices
251                                        for (Integer eqr : clusterEqrs.get(i))
252                                                alignRes1.add(eqr + atomArray1.size());
253                                        for (Integer eqr : clusterEqrs.get(j))
254                                                alignRes2.add(eqr + atomArray2.size());
255
256                                        // Apend atoms to the arrays
257                                        atomArray1.addAll(Arrays.asList(s1.get(orig1)
258                                                        .getRepresentativeAtoms()));
259                                        atomArray2.addAll(Arrays.asList(s2.get(orig2)
260                                                        .getRepresentativeAtoms()));
261
262                                        subunitMap.put(orig1, orig2);
263                                }
264                        }
265
266                        // Evaluate the goodness of the match with an alignment object
267                        MultipleAlignment msa = new MultipleAlignmentImpl();
268                        msa.setEnsemble(new MultipleAlignmentEnsembleImpl());
269                        msa.getEnsemble().setAtomArrays(
270                                        Arrays.asList(new Atom[][] {
271                                                        atomArray1.toArray(new Atom[atomArray1.size()]),
272                                                        atomArray2.toArray(new Atom[atomArray2.size()]) }));
273
274                        // Fill in the alignment information
275                        BlockSet bs = new BlockSetImpl(msa);
276                        Block b = new BlockImpl(bs);
277                        List<List<Integer>> alignRes = new ArrayList<List<Integer>>(2);
278                        alignRes.add(alignRes1);
279                        alignRes.add(alignRes2);
280                        b.setAlignRes(alignRes);
281
282                        // Fill in the transformation matrices
283                        new ReferenceSuperimposer().superimpose(msa);
284
285                        // Calculate some scores
286                        MultipleAlignmentScorer.calculateScores(msa);
287
288                        // If it is the best match found so far store it
289                        if (subunitMap.size() > result.getSubunitMap().size()) {
290                                result.setSubunitMap(subunitMap);
291                                result.setAlignment(msa);
292                                logger.info("Better result found: " + result.toString());
293                        } else if (subunitMap.size() == result.getSubunitMap().size()) {
294                                if (result.getAlignment() == null) {
295                                        result.setSubunitMap(subunitMap);
296                                        result.setAlignment(msa);
297                                } else if (msa.getScore(MultipleAlignmentScorer.RMSD) < result
298                                                .getRmsd()) {
299                                        result.setSubunitMap(subunitMap);
300                                        result.setAlignment(msa);
301                                        logger.info("Better result found: " + result.toString());
302                                }
303                        }
304
305                }
306
307                return result;
308        }
309
310        /**
311         * Returns a pair of Atom arrays corresponding to the alignment of subunit
312         * matchings, in order of appearance. Superposition of the two Atom sets
313         * gives the transformation of the complex.
314         * <p>
315         * Utility method to cumulative calculate the alignment Atoms.
316         *
317         * @param clusters
318         *            List of SubunitClusters
319         * @param clusterSubunitMap
320         *            map from cluster id to subunit matching
321         * @return pair of atom arrays to be superposed
322         */
323        private static Pair<Atom[]> getAlignedAtomsForClusterSubunitMap(
324                        List<SubunitCluster> clusters,
325                        Map<Integer, Map<Integer, Integer>> clusterSubunitMap) {
326
327                List<Atom> atomArray1 = new ArrayList<Atom>();
328                List<Atom> atomArray2 = new ArrayList<Atom>();
329
330                // For each cluster of subunits
331                for (int key : clusterSubunitMap.keySet()) {
332
333                        // Obtain the cluster and the alignment in it
334                        SubunitCluster cluster = clusters.get(key);
335
336                        // For each subunit matching in the cluster
337                        for (Entry<Integer, Integer> pair : clusterSubunitMap.get(key)
338                                        .entrySet()) {
339
340                                int i = pair.getKey();
341                                int j = pair.getValue();
342
343                                // Apend atoms to the arrays
344                                atomArray1.addAll(Arrays.asList(cluster
345                                                .getAlignedAtomsSubunit(i)));
346                                atomArray2.addAll(Arrays.asList(cluster
347                                                .getAlignedAtomsSubunit(j)));
348                        }
349
350                }
351                return new Pair<Atom[]>(
352                                atomArray1.toArray(new Atom[atomArray1.size()]),
353                                atomArray2.toArray(new Atom[atomArray2.size()]));
354        }
355
356        /**
357         * Returns the transformation matrix corresponding to the alignment of
358         * subunit matchings.
359         * <p>
360         * Utility method to cumulative calculate the alignment transformation.
361         *
362         * @param clusters
363         *            List of SubunitClusters
364         * @param clusterSubunitMap
365         *            map from cluster id to subunit matching
366         * @return transformation matrix
367         * @throws StructureException
368         */
369        private static Matrix4d getTransformForClusterSubunitMap(
370                        List<SubunitCluster> clusters,
371                        Map<Integer, Map<Integer, Integer>> clusterSubunitMap)
372                        throws StructureException {
373
374                Pair<Atom[]> pair = getAlignedAtomsForClusterSubunitMap(clusters,
375                                clusterSubunitMap);
376
377                return SuperPositions.superpose(Calc.atomsToPoints(pair.getFirst()),
378                                Calc.atomsToPoints(pair.getSecond()));
379
380        }
381}