001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.mc; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.SortedSet; 027import java.util.TreeSet; 028import java.util.concurrent.Callable; 029import java.util.concurrent.ExecutionException; 030import java.util.concurrent.ExecutorService; 031import java.util.concurrent.Executors; 032import java.util.concurrent.Future; 033 034import org.biojava.nbio.structure.Atom; 035import org.biojava.nbio.structure.StructureException; 036import org.biojava.nbio.structure.align.CallableStructureAlignment; 037import org.biojava.nbio.structure.align.MultipleStructureAligner; 038import org.biojava.nbio.structure.align.StructureAlignment; 039import org.biojava.nbio.structure.align.ce.CeCPMain; 040import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams; 041import org.biojava.nbio.structure.align.model.AFPChain; 042import org.biojava.nbio.structure.align.multiple.Block; 043import org.biojava.nbio.structure.align.multiple.BlockImpl; 044import org.biojava.nbio.structure.align.multiple.BlockSet; 045import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 046import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 047import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 048import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; 049import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053/** 054 * Main class of the Java implementation of the Combinatorial Extension - 055 * Monte Carlo (CEMC) Algorithm, 056 * as it was originally described by C.Guda, E.D.Scheeff, P.E. Bourne and 057 * I.N. Shindyalov (2001). 058 * The original CEMC paper is available from 059 * <a href="http://psb.stanford.edu/psb-online/proceedings/psb01/guda.pdf"> 060 * here</a>. 061 * <p> 062 * This implementation is a generalized version that allows any pairwise 063 * structure alignment algorithm as input, thus supporting any non-topological 064 * or flexible alignment. The seed can also directly be the input for the 065 * optimization. For that, look at {@link MultipleMcOptimizer}. 066 * <p> 067 * A Demo on how to use the algorithm can be found in the demo package. 068 * 069 * @author Aleix Lafita 070 * @since 4.1.0 071 * 072 */ 073public class MultipleMcMain implements MultipleStructureAligner { 074 075 private static final Logger logger = 076 LoggerFactory.getLogger(MultipleMcMain.class); 077 078 /** 079 * Version history:<p> 080 * 1.0 - Initial code implementation from CEMC article.<p> 081 * 1.1 - Support CP, non-topological and flexible seed alignments.<p> 082 */ 083 public static final String version = "1.1"; 084 public static final String algorithmName = "jMultipleMC"; 085 086 private MultipleMcParameters params; 087 private MultipleAlignmentEnsemble ensemble; 088 private StructureAlignment pairwise; 089 private int reference = 0; 090 091 /** 092 * Default constructor. 093 * Default parameters are used. 094 * @param pairwiseAlgo the pairwise structure alignment used to generate the 095 * multiple alignment seed. 096 */ 097 public MultipleMcMain(StructureAlignment pairwiseAlgo){ 098 ensemble = null; 099 params = new MultipleMcParameters(); 100 pairwise = pairwiseAlgo; 101 if (pairwise == null) pairwise = new CeCPMain(); 102 } 103 104 /** 105 * Creates a MultipleAlignment seed for MC optimization from the 106 * representative Atoms of the structures. If there are N structures: 107 * <ul><li>Generate (N*(N-1))/2 all-to-all alignments in parallel using 108 * the Java API. 109 * <li>Choose the closest structure to all others as the reference. 110 * <li>Generate a MultipleAlignment by combining all the alignments to 111 * the reference, that will be used as a seed for the MC optimization. 112 * </ul> 113 * 114 * @param atomArrays List of Atoms to align of the structures 115 * @return MultipleAlignment seed alignment 116 * @throws ExecutionException 117 * @throws InterruptedException 118 * @throws StructureException 119 */ 120 private MultipleAlignment generateSeed(List<Atom[]> atomArrays) 121 throws InterruptedException, 122 ExecutionException, StructureException { 123 124 int size = atomArrays.size(); 125 126 //List to store the all-to-all alignments 127 List<List<AFPChain>> afpAlignments = new ArrayList<>(); 128 for (int i=0; i<size; i++){ 129 afpAlignments.add(new ArrayList<AFPChain>()); 130 for (int j=0; j<size; j++) 131 afpAlignments.get(i).add(null); 132 } 133 134 int threads = params.getNrThreads(); 135 ExecutorService executor = Executors.newFixedThreadPool(threads); 136 List<Future<AFPChain>> afpFuture = new ArrayList<>(); 137 138 //Create all the possible protein pairwise combinations 139 //(N*(N-1)/2) and call the pairwise alignment algorithm 140 for (int i=0; i<size; i++){ 141 for (int j=i+1; j<size; j++){ 142 143 Callable<AFPChain> worker = new CallableStructureAlignment( 144 atomArrays.get(i), atomArrays.get(j), 145 pairwise.getAlgorithmName(), pairwise.getParameters()); 146 147 Future<AFPChain> submit = executor.submit(worker); 148 afpFuture.add(submit); 149 } 150 } 151 152 //Store the resulting AFPChains in the 2D List 153 int index = 0; //the alignment index 154 for (int i=0; i<size; i++){ 155 for (int j=i; j<size; j++){ 156 if (i!=j){ 157 afpAlignments.get(i).add(j,afpFuture.get(index).get()); 158 afpAlignments.get(j).add(i,afpFuture.get(index).get()); 159 index++; 160 } 161 } 162 } 163 executor.shutdown(); 164 165 reference = chooseReferenceRMSD(afpAlignments); 166 boolean flexible = false; 167 if (pairwise.getAlgorithmName().contains("flexible")) 168 flexible = true; 169 170 return combineReferenceAlignments(afpAlignments.get(reference), 171 atomArrays, reference, flexible); 172 } 173 174 /** 175 * This method takes the all-to-all pairwise alignments Matrix (as a 176 * double List of AFPChain) and calculates the structure with the 177 * lowest average RMSD against all others. 178 * The index of this structure is returned. 179 * 180 * @param afpAlignments List double containing all-to-all pairwise alignments 181 * @return int reference index 182 */ 183 private static int chooseReferenceRMSD(List<List<AFPChain>> afpAlignments){ 184 185 int size = afpAlignments.size(); 186 187 List<Double> RMSDs = new ArrayList<>(); 188 for (int i=0; i<afpAlignments.size(); i++){ 189 double rmsd=0.0; 190 for (int j=0; j<size; j++){ 191 if (i!=j) 192 rmsd += afpAlignments.get(i).get(j).getTotalRmsdOpt(); 193 } 194 RMSDs.add(rmsd); 195 } 196 int reference = 0; 197 for (int i=1; i<size; i++){ 198 if (RMSDs.get(i) < RMSDs.get(reference)) reference = i; 199 } 200 logger.info("Reference structure is "+reference); 201 return reference; 202 } 203 204 /** 205 * This method takes a list of pairwise alignments to the reference 206 * structure and calculates a MultipleAlignment resulting from combining 207 * their residue equivalencies. 208 * <p> 209 * It uses the blocks in AFPChain as {@link Block}s in the 210 * MultipleAlignment, so considers non-topological 211 * alignments, if the alignment is rigid. If the alignment is flexible, 212 * it considers the blocks as {@link BlockSet}s. 213 * 214 * @param afpList the list of pairwise alignments to the reference 215 * @param atomArrays List of Atoms of the structures 216 * @param ref index of the reference structure 217 * @param flexible uses BlockSets if true, uses Blocks otherwise 218 * @return MultipleAlignment seed alignment 219 * @throws StructureException 220 */ 221 private static MultipleAlignment combineReferenceAlignments( 222 List<AFPChain> afpList, List<Atom[]> atomArrays, 223 int ref, boolean flexible) throws StructureException { 224 225 int size = atomArrays.size(); 226 int length = 0; //the number of residues of the reference structure 227 if (ref==0) length = afpList.get(1).getCa1Length(); 228 else length = afpList.get(0).getCa2Length(); 229 SortedSet<Integer> flexibleBoundaries = new TreeSet<>(); 230 231 //Stores the equivalencies of all the structures as a double List 232 List<List<Integer>> equivalencies = new ArrayList<>(); 233 for (int str=0; str<size; str++){ 234 equivalencies.add(new ArrayList<Integer>()); 235 for (int res=0; res<length; res++){ 236 if (str==ref) equivalencies.get(str).add(res); //identity 237 else equivalencies.get(str).add(null); 238 } 239 } 240 241 //Now we parse the AFPChains adding the residue equivalencies 242 for (int str=0; str<size; str++){ 243 if (str==ref) continue; //avoid self-comparison 244 for (int bk=0; bk<afpList.get(str).getBlockNum(); bk++){ 245 for (int i=0; i<afpList.get(str).getOptLen()[bk]; i++){ 246 int res1 = 0; //reference index 247 int res2 = 0; 248 //The low index is always in the first chain (0) 249 if(str>ref){ 250 res1 = afpList.get(str).getOptAln()[bk][0][i]; 251 res2 = afpList.get(str).getOptAln()[bk][1][i]; 252 } 253 else if (str<ref){ 254 res1 = afpList.get(str).getOptAln()[bk][1][i]; 255 res2 = afpList.get(str).getOptAln()[bk][0][i]; 256 } 257 equivalencies.get(str).set(res1,res2); 258 259 //Add the flexible boundaries if flexible 260 if(flexible && i==0) flexibleBoundaries.add(res1); 261 } 262 } 263 } 264 265 //We have translated the equivalencies, we create the MultipleAlignment 266 MultipleAlignment seed = new MultipleAlignmentImpl(); 267 seed.getEnsemble().setAtomArrays(atomArrays); 268 BlockSet blockSet = new BlockSetImpl(seed); 269 new BlockImpl(blockSet); 270 271 //Store last positions in the block different than null to detect CP 272 int[] lastResidues = new int[size]; 273 Arrays.fill(lastResidues, -1); 274 275 //We loop through all the equivalencies checking for CP 276 for (int pos=0; pos<length; pos++){ 277 //Start a new BlockSet if the position means a boundary 278 if (flexibleBoundaries.contains(pos) && 279 blockSet.getBlocks().get(blockSet.getBlocks().size()-1). 280 getAlignRes() != null){ 281 282 blockSet = new BlockSetImpl(seed); 283 new BlockImpl(blockSet); 284 } 285 286 boolean cp = false; 287 for (int str=0; str<size; str++){ 288 if (equivalencies.get(str).get(pos) == null){ 289 continue; //there is a gap, ignore position 290 } else if (equivalencies.get(str).get(pos)<lastResidues[str]){ 291 cp = true; //current residue is lower than the last 292 break; 293 } else lastResidues[str] = equivalencies.get(str).get(pos); 294 } 295 if (cp){ //if there is a CP create a new Block 296 new BlockImpl(blockSet); 297 Arrays.fill(lastResidues,-1); 298 } 299 300 //Now add the equivalent residues into the Block AlignRes variable 301 for (int str=0; str<size; str++){ 302 Block lastB = blockSet.getBlocks().get( 303 blockSet.getBlocks().size()-1); 304 305 if (lastB.getAlignRes() == null){ 306 //Initialize the aligned residues list 307 List<List<Integer>> alnRes = 308 new ArrayList<>(size); 309 310 for (int k=0; k<size; k++) { 311 alnRes.add(new ArrayList<Integer>()); 312 } 313 lastB.setAlignRes(alnRes); 314 } 315 lastB.getAlignRes().get(str).add( 316 equivalencies.get(str).get(pos)); 317 } 318 } 319 logger.info("Seed alignment has "+seed.getBlocks()+" Blocks."); 320 return seed; 321 } 322 323 @Override 324 public MultipleAlignment align(List<Atom[]> atomArrays, Object parameters) 325 throws StructureException { 326 327 MultipleAlignment result = null; 328 ensemble = new MultipleAlignmentEnsembleImpl(); 329 ensemble.setAtomArrays(atomArrays); 330 ensemble.setAlgorithmName(algorithmName); 331 ensemble.setVersion(version); 332 ensemble.setIoTime(System.currentTimeMillis()); 333 setParameters((ConfigStrucAligParams) parameters); 334 335 //Generate the seed alignment and optimize it 336 try { 337 result = generateSeed(atomArrays); 338 } catch (InterruptedException e) { 339 logger.warn("Seed generation failed.",e); 340 } catch (ExecutionException e) { 341 logger.warn("Seed generation failed.",e); 342 } 343 344 //Repeat the optimization in parallel - DISALLOWED 345 /*int threads = params.getNrThreads(); 346 ExecutorService executor = Executors.newFixedThreadPool(threads); 347 List<Future<MultipleAlignment>> msaFuture = 348 new ArrayList<Future<MultipleAlignment>>(); 349 350 for (int i=0; i<params.getNrThreads(); i++){ 351 //Change the random seed for each parallelization 352 MultipleMcParameters paramsMC = (MultipleMcParameters) params; 353 paramsMC.setRandomSeed(paramsMC.getRandomSeed()+i); 354 355 Callable<MultipleAlignment> worker = 356 new MultipleMcOptimizer( 357 result, paramsMC, reference); 358 359 Future<MultipleAlignment> submit = executor.submit(worker); 360 msaFuture.add(submit); 361 } 362 363 double maxScore = Double.NEGATIVE_INFINITY; 364 //Take the one with the best result (best MC-Score) 365 for (int i=0; i<msaFuture.size(); i++){ 366 MultipleAlignment align = msaFuture.get(i).get(); 367 double s = align.getScore(MultipleAlignmentScorer.MC_SCORE); 368 if (s > maxScore){ 369 result = align; 370 maxScore = s; 371 } 372 } 373 Long runtime = System.currentTimeMillis()-ensemble.getIoTime(); 374 ensemble.setCalculationTime(runtime); 375 376 result.setEnsemble(ensemble); 377 ensemble.addMultipleAlignment(result); 378 executor.shutdown();*/ 379 380 MultipleMcOptimizer optimizer = new MultipleMcOptimizer( 381 result, params, reference); 382 383 Long runtime = System.currentTimeMillis()-ensemble.getIoTime(); 384 ensemble.setCalculationTime(runtime); 385 386 result = optimizer.optimize(); 387 result.setEnsemble(ensemble); 388 ensemble.addMultipleAlignment(result); 389 390 return result; 391 392 } 393 394 @Override 395 public MultipleAlignment align(List<Atom[]> atomArrays) 396 throws StructureException { 397 398 if (params == null) { 399 logger.info("Using DEFAULT MultipleMc Parameters"); 400 params = new MultipleMcParameters(); 401 } 402 return align(atomArrays,params); 403 } 404 405 @Override 406 public ConfigStrucAligParams getParameters() { 407 return params; 408 } 409 410 @Override 411 public void setParameters(ConfigStrucAligParams parameters) { 412 if (!(parameters instanceof MultipleMcParameters)){ 413 throw new IllegalArgumentException( 414 "Provided parameter object is not of type MultipleMC"); 415 } 416 this.params = (MultipleMcParameters) parameters; 417 } 418 419 @Override 420 public String getAlgorithmName() { 421 return algorithmName; 422 } 423 424 @Override 425 public String getVersion() { 426 return version; 427 } 428}