001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.quaternary; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.HashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.Map.Entry; 029 030import javax.vecmath.Matrix4d; 031 032import org.biojava.nbio.structure.Atom; 033import org.biojava.nbio.structure.Calc; 034import org.biojava.nbio.structure.Structure; 035import org.biojava.nbio.structure.StructureException; 036import org.biojava.nbio.structure.StructureTools; 037import org.biojava.nbio.structure.align.multiple.Block; 038import org.biojava.nbio.structure.align.multiple.BlockImpl; 039import org.biojava.nbio.structure.align.multiple.BlockSet; 040import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 041import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 042import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; 043import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 044import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 045import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer; 046import org.biojava.nbio.structure.cluster.Subunit; 047import org.biojava.nbio.structure.cluster.SubunitCluster; 048import org.biojava.nbio.structure.cluster.SubunitClusterer; 049import org.biojava.nbio.structure.cluster.SubunitClustererParameters; 050import org.biojava.nbio.structure.cluster.SubunitExtractor; 051import org.biojava.nbio.structure.contact.Pair; 052import org.biojava.nbio.structure.geometry.SuperPositions; 053import org.biojava.nbio.structure.geometry.UnitQuaternions; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Quaternary Structure Alignment (QS-Align). The algorithm takes as input two 059 * protein structures at the quaternary structure level (multiple chains or 060 * subunits) and calculates the equivalent subunit matching and a residue-based 061 * alignment, together with usual alignment quality scores. 062 * 063 * @author Aleix Lafita 064 * @since 5.0.0 065 * 066 */ 067public class QsAlign { 068 069 private static final Logger logger = LoggerFactory.getLogger(QsAlign.class); 070 071 public static QsAlignResult align(Structure s1, Structure s2, 072 SubunitClustererParameters cParams, QsAlignParameters aParams) 073 throws StructureException { 074 return align( 075 SubunitExtractor.extractSubunits(s1, 076 cParams.getAbsoluteMinimumSequenceLength(), 077 cParams.getMinimumSequenceLengthFraction(), 078 cParams.getMinimumSequenceLength()), 079 SubunitExtractor.extractSubunits(s2, 080 cParams.getAbsoluteMinimumSequenceLength(), 081 cParams.getMinimumSequenceLengthFraction(), 082 cParams.getMinimumSequenceLength()), cParams, aParams); 083 } 084 085 public static QsAlignResult align(List<Subunit> s1, List<Subunit> s2, 086 SubunitClustererParameters cParams, QsAlignParameters aParams) 087 throws StructureException { 088 089 QsAlignResult result = new QsAlignResult(s1, s2); 090 091 // SETP 1: cluster each group of subunits O(N^2*L^2) - intra 092 093 List<SubunitCluster> c1 = SubunitClusterer.cluster(s1, cParams).getClusters(); 094 List<SubunitCluster> c2 = SubunitClusterer.cluster(s2, cParams).getClusters(); 095 096 // STEP 2: match each subunit cluster between groups O(N^2*L^2) - inter 097 Map<Integer, Integer> clusterMap = new HashMap<>(); 098 for (int i = 0; i < c1.size(); i++) { 099 for (int j = 0; j < c2.size(); j++) { 100 101 if (clusterMap.containsKey(i)) 102 break; 103 if (clusterMap.containsValue(j)) 104 continue; 105 106 // Use structural alignment to match the subunit clusters 107 if (c1.get(i).mergeStructure(c2.get(j),cParams)) { 108 clusterMap.put(i, j); 109 } 110 } 111 } 112 113 logger.info("Cluster Map: " + clusterMap.toString()); 114 result.setClusters(c1); 115 116 // STEP 3: Align the assemblies for each cluster match O(N^2*L) 117 for (int globalKey : clusterMap.keySet()) { 118 119 // Obtain the clusters 120 SubunitCluster clust1 = c1.get(globalKey); 121 SubunitCluster clust2 = c2.get(clusterMap.get(globalKey)); 122 123 // Take a cluster match as reference 124 int index1 = 0; 125 int index2 = clust1.size() - clust2.size(); 126 Map<Integer, Integer> subunitMap = new HashMap<>(); 127 subunitMap.put(index1, index2); 128 129 // Map cluster id to their subunit matching 130 Map<Integer, Map<Integer, Integer>> clustSubunitMap = new HashMap<>(); 131 clustSubunitMap.put(globalKey, subunitMap); 132 133 // Change order of key set so that globalKey is first 134 List<Integer> keySet = new ArrayList<>(clusterMap.keySet()); 135 keySet.remove((Integer) globalKey); 136 keySet.add(0, globalKey); 137 138 for (int key : clusterMap.keySet()) { 139 140 // Recover subunitMap if it is the reference, new one otherwise 141 if (key == globalKey) 142 subunitMap = clustSubunitMap.get(key); 143 else 144 subunitMap = new HashMap<>(); 145 146 // Obtain the clusters of each subunit group 147 clust1 = c1.get(key); 148 clust2 = c2.get(clusterMap.get(key)); 149 150 // Get the initial subunit indices of each group 151 index1 = 0; 152 index2 = clust1.size() - clust2.size(); 153 154 for (int i = 0; i < index2; i++) { 155 for (int j = index2; j < clust1.size(); j++) { 156 157 if (subunitMap.containsKey(i)) 158 break; 159 if (subunitMap.containsValue(j)) 160 continue; 161 162 // Obtain cumulative transformation matrix 163 Matrix4d transform = getTransformForClusterSubunitMap( 164 c1, clustSubunitMap); 165 166 // Obtain Atom arrays of the subunit pair to match 167 Atom[] atoms1 = clust1.getAlignedAtomsSubunit(i); 168 Atom[] atoms2 = clust1.getAlignedAtomsSubunit(j); 169 170 // Obtain centroids and transform the second 171 Atom centr1 = Calc.getCentroid(atoms1); 172 Atom centr2 = Calc.getCentroid(atoms2); 173 Calc.transform(centr2, transform); 174 175 // 1- Check that the distance fulfills maximum 176 double dCentroid = Calc.getDistance(centr1, centr2); 177 if (dCentroid > aParams.getdCutoff()) { 178 logger.debug(String.format("Subunit matching %d " 179 + "vs %d of cluster %d could not be " 180 + "matched, because centroid distance is " 181 + "%.2f", index1, index2, key, dCentroid)); 182 continue; 183 } 184 185 // Transform coordinates of second 186 Atom[] atoms2c = StructureTools.cloneAtomArray(atoms2); 187 Calc.transform(atoms2c, transform); 188 189 // 2- Check the orientation metric condition 190 double qOrient = UnitQuaternions.orientationAngle( 191 Calc.atomsToPoints(atoms1), 192 Calc.atomsToPoints(atoms2c), false); 193 qOrient = Math.min(Math.abs(2*Math.PI - qOrient), qOrient); 194 if (qOrient > aParams.getMaxOrientationAngle()) { 195 logger.debug(String.format("Subunit matching %d " 196 + "vs %d of cluster %d could not be " 197 + "matched, because orientation metric is " 198 + "%.2f", i, j, key, qOrient)); 199 continue; 200 } 201 202 // 3- Check the RMSD condition 203 double rmsd = Calc.rmsd(atoms1, atoms2c); 204 if (rmsd > aParams.getMaxRmsd()) { 205 logger.debug(String.format("Subunit matching %d " 206 + "vs %d of cluster %d could not be " 207 + "matched, because RMSD is %.2f", i, 208 j, key, rmsd)); 209 continue; 210 } 211 212 logger.info(String.format("Subunit matching %d vs %d" 213 + " of cluster %d with centroid distance %.2f" 214 + ", orientation metric %.2f and RMSD %.2f", 215 i, j, key, dCentroid, qOrient, rmsd)); 216 217 subunitMap.put(i, j); 218 } 219 } 220 221 clustSubunitMap.put(key, subunitMap); 222 } 223 224 logger.info("Cluster Subunit Map: " + clustSubunitMap.toString()); 225 226 // Unfold the nested map into subunit map and alignment 227 subunitMap = new HashMap<>(); 228 List<Integer> alignRes1 = new ArrayList<>(); 229 List<Integer> alignRes2 = new ArrayList<>(); 230 List<Atom> atomArray1 = new ArrayList<>(); 231 List<Atom> atomArray2 = new ArrayList<>(); 232 233 for (int key : clustSubunitMap.keySet()) { 234 235 // Obtain the cluster and the alignment in it 236 SubunitCluster cluster = c1.get(key); 237 List<List<Integer>> clusterEqrs = cluster 238 .getMultipleAlignment().getBlock(0).getAlignRes(); 239 240 for (Entry<Integer, Integer> pair : clustSubunitMap.get(key) 241 .entrySet()) { 242 243 int i = pair.getKey(); 244 int j = pair.getValue(); 245 246 // Obtain the indices of the original Subunit Lists 247 int orig1 = s1.indexOf(cluster.getSubunits().get(i)); 248 int orig2 = s2.indexOf(cluster.getSubunits().get(j)); 249 250 // Append rescaled aligned residue indices 251 for (Integer eqr : clusterEqrs.get(i)) 252 alignRes1.add(eqr + atomArray1.size()); 253 for (Integer eqr : clusterEqrs.get(j)) 254 alignRes2.add(eqr + atomArray2.size()); 255 256 // Apend atoms to the arrays 257 atomArray1.addAll(Arrays.asList(s1.get(orig1) 258 .getRepresentativeAtoms())); 259 atomArray2.addAll(Arrays.asList(s2.get(orig2) 260 .getRepresentativeAtoms())); 261 262 subunitMap.put(orig1, orig2); 263 } 264 } 265 266 // Evaluate the goodness of the match with an alignment object 267 MultipleAlignment msa = new MultipleAlignmentImpl(); 268 msa.setEnsemble(new MultipleAlignmentEnsembleImpl()); 269 msa.getEnsemble().setAtomArrays( 270 Arrays.asList(new Atom[][] { 271 atomArray1.toArray(new Atom[atomArray1.size()]), 272 atomArray2.toArray(new Atom[atomArray2.size()]) })); 273 274 // Fill in the alignment information 275 BlockSet bs = new BlockSetImpl(msa); 276 Block b = new BlockImpl(bs); 277 List<List<Integer>> alignRes = new ArrayList<>(2); 278 alignRes.add(alignRes1); 279 alignRes.add(alignRes2); 280 b.setAlignRes(alignRes); 281 282 // Fill in the transformation matrices 283 new ReferenceSuperimposer().superimpose(msa); 284 285 // Calculate some scores 286 MultipleAlignmentScorer.calculateScores(msa); 287 288 // If it is the best match found so far store it 289 if (subunitMap.size() > result.getSubunitMap().size()) { 290 result.setSubunitMap(subunitMap); 291 result.setAlignment(msa); 292 logger.info("Better result found: " + result.toString()); 293 } else if (subunitMap.size() == result.getSubunitMap().size()) { 294 if (result.getAlignment() == null) { 295 result.setSubunitMap(subunitMap); 296 result.setAlignment(msa); 297 } else if (msa.getScore(MultipleAlignmentScorer.RMSD) < result.getRmsd()) { 298 result.setSubunitMap(subunitMap); 299 result.setAlignment(msa); 300 logger.info("Better result found: " + result.toString()); 301 } 302 } 303 304 } 305 306 return result; 307 } 308 309 /** 310 * Returns a pair of Atom arrays corresponding to the alignment of subunit 311 * matchings, in order of appearance. Superposition of the two Atom sets 312 * gives the transformation of the complex. 313 * <p> 314 * Utility method to cumulative calculate the alignment Atoms. 315 * 316 * @param clusters 317 * List of SubunitClusters 318 * @param clusterSubunitMap 319 * map from cluster id to subunit matching 320 * @return pair of atom arrays to be superposed 321 */ 322 private static Pair<Atom[]> getAlignedAtomsForClusterSubunitMap( 323 List<SubunitCluster> clusters, 324 Map<Integer, Map<Integer, Integer>> clusterSubunitMap) { 325 326 List<Atom> atomArray1 = new ArrayList<>(); 327 List<Atom> atomArray2 = new ArrayList<>(); 328 329 // For each cluster of subunits 330 for (int key : clusterSubunitMap.keySet()) { 331 332 // Obtain the cluster and the alignment in it 333 SubunitCluster cluster = clusters.get(key); 334 335 // For each subunit matching in the cluster 336 for (Entry<Integer, Integer> pair : clusterSubunitMap.get(key) 337 .entrySet()) { 338 339 int i = pair.getKey(); 340 int j = pair.getValue(); 341 342 // Apend atoms to the arrays 343 atomArray1.addAll(Arrays.asList(cluster 344 .getAlignedAtomsSubunit(i))); 345 atomArray2.addAll(Arrays.asList(cluster 346 .getAlignedAtomsSubunit(j))); 347 } 348 349 } 350 return new Pair<>( 351 atomArray1.toArray(new Atom[0]), 352 atomArray2.toArray(new Atom[0])); 353 } 354 355 /** 356 * Returns the transformation matrix corresponding to the alignment of 357 * subunit matchings. 358 * <p> 359 * Utility method to cumulative calculate the alignment transformation. 360 * 361 * @param clusters 362 * List of SubunitClusters 363 * @param clusterSubunitMap 364 * map from cluster id to subunit matching 365 * @return transformation matrix 366 */ 367 private static Matrix4d getTransformForClusterSubunitMap( 368 List<SubunitCluster> clusters, 369 Map<Integer, Map<Integer, Integer>> clusterSubunitMap) { 370 371 Pair<Atom[]> pair = getAlignedAtomsForClusterSubunitMap(clusters, 372 clusterSubunitMap); 373 374 return SuperPositions.superpose(Calc.atomsToPoints(pair.getFirst()), 375 Calc.atomsToPoints(pair.getSecond())); 376 377 } 378}