001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.util; 022 023import org.biojava.nbio.structure.*; 024import org.biojava.nbio.structure.align.AFPTwister; 025import org.biojava.nbio.structure.align.ce.CECalculator; 026import org.biojava.nbio.structure.align.fatcat.FatCatFlexible; 027import org.biojava.nbio.structure.align.fatcat.FatCatRigid; 028import org.biojava.nbio.structure.align.model.AFPChain; 029import org.biojava.nbio.structure.align.xml.AFPChainXMLParser; 030import org.biojava.nbio.structure.geometry.Matrices; 031import org.biojava.nbio.structure.geometry.SuperPositions; 032import org.biojava.nbio.structure.jama.Matrix; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036import java.io.IOException; 037import java.io.Writer; 038import java.util.*; 039import java.util.Map.Entry; 040import java.util.regex.Matcher; 041import java.util.regex.Pattern; 042 043import javax.vecmath.Matrix4d; 044 045/** 046 * Methods for analyzing and manipulating AFPChains and for 047 * other pairwise alignment utilities. <p> 048 * Current methods: replace optimal alignment, create new AFPChain, 049 * format conversion, update superposition, etc. 050 * 051 * @author Spencer Bliven 052 * @author Aleix Lafita 053 * 054 */ 055public class AlignmentTools { 056 057 private static final Logger logger = LoggerFactory.getLogger(AlignmentTools.class); 058 059 060 public static boolean debug = false; 061 062 /** 063 * Checks that the alignment given by afpChain is sequential. This means 064 * that the residue indices of both proteins increase monotonically as 065 * a function of the alignment position (ie both proteins are sorted). 066 * 067 * This will return false for circularly permuted alignments or other 068 * non-topological alignments. It will also return false for cases where 069 * the alignment itself is sequential but it is not stored in the afpChain 070 * in a sorted manner. 071 * 072 * Since algorithms which create non-sequential alignments split the 073 * alignment into multiple blocks, some computational time can be saved 074 * by only checking block boundaries for sequentiality. Setting 075 * <tt>checkWithinBlocks</tt> to <tt>true</tt> makes this function slower, 076 * but detects AFPChains with non-sequential blocks. 077 * 078 * Note that this method should give the same results as 079 * {@link AFPChain#isSequentialAlignment()}. However, the AFPChain version 080 * relies on the StructureAlignment algorithm correctly setting this 081 * parameter, which is sadly not always the case. 082 * 083 * @param afpChain An alignment 084 * @param checkWithinBlocks Indicates whether individual blocks should be 085 * checked for sequentiality 086 * @return True if the alignment is sequential. 087 */ 088 public static boolean isSequentialAlignment(AFPChain afpChain, boolean checkWithinBlocks) { 089 int[][][] optAln = afpChain.getOptAln(); 090 int[] alnLen = afpChain.getOptLen(); 091 int blocks = afpChain.getBlockNum(); 092 093 if(blocks < 1) return true; //trivial case 094 if ( alnLen[0] < 1) return true; 095 096 // Check that blocks are sequential 097 if(checkWithinBlocks) { 098 for(int block = 0; block<blocks; block++) { 099 if(alnLen[block] < 1 ) continue; //skip empty blocks 100 101 int prevRes1 = optAln[block][0][0]; 102 int prevRes2 = optAln[block][1][0]; 103 104 for(int pos = 1; pos<alnLen[block]; pos++) { 105 int currRes1 = optAln[block][0][pos]; 106 int currRes2 = optAln[block][1][pos]; 107 108 if(currRes1 < prevRes1) { 109 return false; 110 } 111 if(currRes2 < prevRes2) { 112 return false; 113 } 114 115 prevRes1 = currRes1; 116 prevRes2 = currRes2; 117 } 118 } 119 } 120 121 // Check that blocks are sequential 122 int prevRes1 = optAln[0][0][alnLen[0]-1]; 123 int prevRes2 = optAln[0][1][alnLen[0]-1]; 124 125 for(int block = 1; block<blocks;block++) { 126 if(alnLen[block] < 1 ) continue; //skip empty blocks 127 128 if(optAln[block][0][0]<prevRes1) { 129 return false; 130 } 131 if(optAln[block][1][0]<prevRes2) { 132 return false; 133 } 134 135 prevRes1 = optAln[block][0][alnLen[block]-1]; 136 prevRes2 = optAln[block][1][alnLen[block]-1]; 137 } 138 139 return true; 140 } 141 142 /** 143 * Creates a Map specifying the alignment as a mapping between residue indices 144 * of protein 1 and residue indices of protein 2. 145 * 146 * <p>For example,<pre> 147 * 1234 148 * 5678</pre> 149 * becomes<pre> 150 * 1->5 151 * 2->6 152 * 3->7 153 * 4->8</pre> 154 * 155 * @param afpChain An alignment 156 * @return A mapping from aligned residues of protein 1 to their partners in protein 2. 157 * @throws StructureException If afpChain is not one-to-one 158 */ 159 public static Map<Integer, Integer> alignmentAsMap(AFPChain afpChain) throws StructureException { 160 Map<Integer,Integer> map = new HashMap<Integer,Integer>(); 161 162 if( afpChain.getAlnLength() < 1 ) { 163 return map; 164 } 165 int[][][] optAln = afpChain.getOptAln(); 166 int[] optLen = afpChain.getOptLen(); 167 for(int block = 0; block < afpChain.getBlockNum(); block++) { 168 for(int pos = 0; pos < optLen[block]; pos++) { 169 int res1 = optAln[block][0][pos]; 170 int res2 = optAln[block][1][pos]; 171 if(map.containsKey(res1)) { 172 throw new StructureException(String.format("Residue %d aligned to both %d and %d.", res1,map.get(res1),res2)); 173 } 174 map.put(res1,res2); 175 } 176 } 177 return map; 178 } 179 180 /** 181 * Applies an alignment k times. Eg if alignmentMap defines function f(x), 182 * this returns a function f^k(x)=f(f(...f(x)...)). 183 * 184 * @param <T> 185 * @param alignmentMap The input function, as a map (see {@link AlignmentTools#alignmentAsMap(AFPChain)}) 186 * @param k The number of times to apply the alignment 187 * @return A new alignment. If the input function is not automorphic 188 * (one-to-one), then some inputs may map to null, indicating that the 189 * function is undefined for that input. 190 */ 191 public static <T> Map<T,T> applyAlignment(Map<T, T> alignmentMap, int k) { 192 return applyAlignment(alignmentMap, new IdentityMap<T>(), k); 193 } 194 195 /** 196 * Applies an alignment k times. Eg if alignmentMap defines function f(x), 197 * this returns a function f^k(x)=f(f(...f(x)...)). 198 * 199 * To allow for functions with different domains and codomains, the identity 200 * function allows converting back in a reasonable way. For instance, if 201 * alignmentMap represented an alignment between two proteins with different 202 * numbering schemes, the identity function could calculate the offset 203 * between residue numbers, eg I(x) = x-offset. 204 * 205 * When an identity function is provided, the returned function calculates 206 * f^k(x) = f(I( f(I( ... f(x) ... )) )). 207 * 208 * @param <S> 209 * @param <T> 210 * @param alignmentMap The input function, as a map (see {@link AlignmentTools#alignmentAsMap(AFPChain)}) 211 * @param identity An identity-like function providing the isomorphism between 212 * the codomain of alignmentMap (of type <T>) and the domain (type <S>). 213 * @param k The number of times to apply the alignment 214 * @return A new alignment. If the input function is not automorphic 215 * (one-to-one), then some inputs may map to null, indicating that the 216 * function is undefined for that input. 217 */ 218 public static <S,T> Map<S,T> applyAlignment(Map<S, T> alignmentMap, Map<T,S> identity, int k) { 219 220 // This implementation simply applies the map k times. 221 // If k were large, it would be more efficient to do this recursively, 222 // (eg f^4 = (f^2)^2) but k will usually be small. 223 224 if(k<0) throw new IllegalArgumentException("k must be positive"); 225 if(k==1) { 226 return new HashMap<S,T>(alignmentMap); 227 } 228 // Convert to lists to establish a fixed order 229 List<S> preimage = new ArrayList<S>(alignmentMap.keySet()); // currently unmodified 230 List<S> image = new ArrayList<S>(preimage); 231 232 for(int n=1;n<k;n++) { 233 // apply alignment 234 for(int i=0;i<image.size();i++) { 235 S pre = image.get(i); 236 T intermediate = (pre==null?null: alignmentMap.get(pre)); 237 S post = (intermediate==null?null: identity.get(intermediate)); 238 image.set(i, post); 239 } 240 } 241 242 243 244 Map<S, T> imageMap = new HashMap<S,T>(alignmentMap.size()); 245 246 //TODO handle nulls consistently. 247 // assure that all the residues in the domain are valid keys 248 /* 249 for(int i=0;i<preimage.size();i++) { 250 S pre = preimage.get(i); 251 T intermediate = (pre==null?null: alignmentMap.get(pre)); 252 S post = (intermediate==null?null: identity.get(intermediate)); 253 imageMap.put(post, null); 254 } 255 */ 256 // now populate with actual values 257 for(int i=0;i<preimage.size();i++) { 258 S pre = preimage.get(i); 259 260 // image is currently f^k-1(x), so take the final step 261 S preK1 = image.get(i); 262 T postK = (preK1==null?null: alignmentMap.get(preK1)); 263 imageMap.put(pre,postK); 264 265 } 266 return imageMap; 267 } 268 269 /** 270 * Helper for {@link #getSymmetryOrder(Map, Map, int, float)} with a true 271 * identity function (X->X). 272 * 273 * <p>This method should only be used in cases where the two proteins 274 * aligned have identical numbering, as for self-alignments. See 275 * {@link #getSymmetryOrder(AFPChain, int, float)} for a way to guess 276 * the sequential correspondence between two proteins. 277 * 278 * @param alignment 279 * @param maxSymmetry 280 * @param minimumMetricChange 281 * @return 282 */ 283 public static int getSymmetryOrder(Map<Integer, Integer> alignment, 284 final int maxSymmetry, final float minimumMetricChange) { 285 return getSymmetryOrder(alignment, new IdentityMap<Integer>(), maxSymmetry, minimumMetricChange); 286 } 287 /** 288 * Tries to detect symmetry in an alignment. 289 * 290 * <p>Conceptually, an alignment is a function f:A->B between two sets of 291 * integers. The function may have simple topology (meaning that if two 292 * elements of A are close, then their images in B will also be close), or 293 * may have more complex topology (such as a circular permutation). This 294 * function checks <i>alignment</i> against a reference function 295 * <i>identity</i>, which should have simple topology. It then tries to 296 * determine the symmetry order of <i>alignment</i> relative to 297 * <i>identity</i>, up to a maximum order of <i>maxSymmetry</i>. 298 * 299 * 300 * <p><strong>Details</strong><br/> 301 * Considers the offset (in number of residues) which a residue moves 302 * after undergoing <i>n</i> alternating transforms by alignment and 303 * identity. If <i>n</i> corresponds to the intrinsic order of the alignment, 304 * this will be small. This algorithm tries increasing values of <i>n</i> 305 * and looks for abrupt decreases in the root mean squared offset. 306 * If none are found at <i>n</i><=maxSymmetry, the alignment is reported as 307 * non-symmetric. 308 * 309 * @param alignment The alignment to test for symmetry 310 * @param identity An alignment with simple topology which approximates 311 * the sequential relationship between the two proteins. Should map in the 312 * reverse direction from alignment. 313 * @param maxSymmetry Maximum symmetry to consider. High values increase 314 * the calculation time and can lead to overfitting. 315 * @param minimumMetricChange Percent decrease in root mean squared offsets 316 * in order to declare symmetry. 0.4f seems to work well for CeSymm. 317 * @return The order of symmetry of alignment, or 1 if no order <= 318 * maxSymmetry is found. 319 * 320 * @see IdentityMap For a simple identity function 321 */ 322 public static int getSymmetryOrder(Map<Integer, Integer> alignment, Map<Integer,Integer> identity, 323 final int maxSymmetry, final float minimumMetricChange) { 324 List<Integer> preimage = new ArrayList<Integer>(alignment.keySet()); // currently unmodified 325 List<Integer> image = new ArrayList<Integer>(preimage); 326 327 int bestSymmetry = 1; 328 double bestMetric = Double.POSITIVE_INFINITY; //lower is better 329 boolean foundSymmetry = false; 330 331 if(debug) { 332 logger.trace("Symm\tPos\tDelta"); 333 } 334 335 for(int n=1;n<=maxSymmetry;n++) { 336 int deltasSq = 0; 337 int numDeltas = 0; 338 // apply alignment 339 for(int i=0;i<image.size();i++) { 340 Integer pre = image.get(i); 341 Integer intermediate = (pre==null?null: alignment.get(pre)); 342 Integer post = (intermediate==null?null: identity.get(intermediate)); 343 image.set(i, post); 344 345 if(post != null) { 346 int delta = post-preimage.get(i); 347 348 deltasSq += delta*delta; 349 numDeltas++; 350 351 if(debug) { 352 logger.debug("%d\t%d\t%d\n",n,preimage.get(i),delta); 353 } 354 } 355 356 } 357 358 // Metrics: RMS compensates for the trend of smaller numDeltas with higher order 359 // Not normalizing by numDeltas favors smaller orders 360 361 double metric = Math.sqrt((double)deltasSq/numDeltas); // root mean squared distance 362 363 if(!foundSymmetry && metric < bestMetric * minimumMetricChange) { 364 // n = 1 is never the best symmetry 365 if(bestMetric < Double.POSITIVE_INFINITY) { 366 foundSymmetry = true; 367 } 368 bestSymmetry = n; 369 bestMetric = metric; 370 } 371 372 // When debugging need to loop over everything. Unneeded in production 373 if(!debug && foundSymmetry) { 374 break; 375 } 376 377 } 378 if(foundSymmetry) { 379 return bestSymmetry; 380 } else { 381 return 1; 382 } 383 } 384 385 386 /** 387 * Guesses the order of symmetry in an alignment 388 * 389 * <p>Uses {@link #getSymmetryOrder(Map alignment, Map identity, int, float)} 390 * to determine the the symmetry order. For the identity alignment, sorts 391 * the aligned residues of each protein sequentially, then defines the ith 392 * residues of each protein to be equivalent. 393 * 394 * <p>Note that the selection of the identity alignment here is <i>very</i> 395 * naive, and only works for proteins with very good coverage. Wherever 396 * possible, it is better to construct an identity function explicitly 397 * from a sequence alignment (or use an {@link IdentityMap} for internally 398 * symmetric proteins) and use {@link #getSymmetryOrder(Map, Map, int, float)}. 399 */ 400 public static int getSymmetryOrder(AFPChain afpChain, int maxSymmetry, float minimumMetricChange) throws StructureException { 401 // alignment comes from the afpChain alignment 402 Map<Integer,Integer> alignment = AlignmentTools.alignmentAsMap(afpChain); 403 404 // Now construct identity to map aligned residues in sequential order 405 Map<Integer, Integer> identity = guessSequentialAlignment(alignment, true); 406 407 408 return AlignmentTools.getSymmetryOrder(alignment, 409 identity, 410 maxSymmetry, minimumMetricChange); 411 } 412 413 /** 414 * Takes a potentially non-sequential alignment and guesses a sequential 415 * version of it. Residues from each structure are sorted sequentially and 416 * then compared directly. 417 * 418 * <p>The results of this method are consistent with what one might expect 419 * from an identity function, and are therefore useful with 420 * {@link #getSymmetryOrder(Map, Map identity, int, float)}. 421 * <ul> 422 * <li>Perfect self-alignments will have the same pre-image and image, 423 * so will map X->X</li> 424 * <li>Gaps and alignment errors will cause errors in the resulting map, 425 * but only locally. Errors do not propagate through the whole 426 * alignment.</li> 427 * </ul> 428 * 429 * <h4>Example:</h4> 430 * A non sequential alignment, represented schematically as 431 * <pre> 432 * 12456789 433 * 78912345</pre> 434 * would result in a map 435 * <pre> 436 * 12456789 437 * 12345789</pre> 438 * @param alignment The non-sequential input alignment 439 * @param inverseAlignment If false, map from structure1 to structure2. If 440 * true, generate the inverse of that map. 441 * @return A mapping from sequential residues of one protein to those of the other 442 * @throws IllegalArgumentException if the input alignment is not one-to-one. 443 */ 444 public static Map<Integer, Integer> guessSequentialAlignment( 445 Map<Integer,Integer> alignment, boolean inverseAlignment) { 446 Map<Integer,Integer> identity = new HashMap<Integer,Integer>(); 447 448 SortedSet<Integer> aligned1 = new TreeSet<Integer>(); 449 SortedSet<Integer> aligned2 = new TreeSet<Integer>(); 450 451 for(Entry<Integer,Integer> pair : alignment.entrySet()) { 452 aligned1.add(pair.getKey()); 453 if( !aligned2.add(pair.getValue()) ) 454 throw new IllegalArgumentException("Alignment is not one-to-one for residue "+pair.getValue()+" of the second structure."); 455 } 456 457 Iterator<Integer> it1 = aligned1.iterator(); 458 Iterator<Integer> it2 = aligned2.iterator(); 459 while(it1.hasNext()) { 460 if(inverseAlignment) { // 2->1 461 identity.put(it2.next(),it1.next()); 462 } else { // 1->2 463 identity.put(it1.next(),it2.next()); 464 } 465 } 466 return identity; 467 } 468 469 /** 470 * Retrieves the optimum alignment from an AFPChain and returns it as a 471 * java collection. The result is indexed in the same way as 472 * {@link AFPChain#getOptAln()}, but has the correct size(). 473 * <pre> 474 * List<List<List<Integer>>> aln = getOptAlnAsList(AFPChain afpChain); 475 * aln.get(blockNum).get(structureNum={0,1}).get(pos)</pre> 476 * 477 * @param afpChain 478 * @return 479 */ 480 public static List<List<List<Integer>>> getOptAlnAsList(AFPChain afpChain) { 481 int[][][] optAln = afpChain.getOptAln(); 482 int[] optLen = afpChain.getOptLen(); 483 List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum()); 484 for(int blockNum=0;blockNum<afpChain.getBlockNum();blockNum++) { 485 //TODO could improve speed an memory by wrapping the arrays with 486 // an unmodifiable list, similar to Arrays.asList(...) but with the 487 // correct size parameter. 488 List<Integer> align1 = new ArrayList<Integer>(optLen[blockNum]); 489 List<Integer> align2 = new ArrayList<Integer>(optLen[blockNum]); 490 for(int pos=0;pos<optLen[blockNum];pos++) { 491 align1.add(optAln[blockNum][0][pos]); 492 align2.add(optAln[blockNum][1][pos]); 493 } 494 List<List<Integer>> block = new ArrayList<List<Integer>>(2); 495 block.add(align1); 496 block.add(align2); 497 blocks.add(block); 498 } 499 500 return blocks; 501 } 502 503 504 505 /** 506 * A Map<K,V> can be viewed as a function from K to V. This class represents 507 * the identity function. Getting a value results in the value itself. 508 * 509 * <p>The class is a bit inconsistent when representing its contents. On 510 * the one hand, containsKey(key) is true for all objects. However, 511 * attempting to iterate through the values returns an empty set. 512 * 513 * @author Spencer Bliven 514 * 515 * @param <K> 516 */ 517 public static class IdentityMap<K> extends AbstractMap<K,K> { 518 public IdentityMap() {} 519 520 /** 521 * @param key 522 * @return the key 523 * @throws ClassCastException if key is not of type K 524 */ 525 @SuppressWarnings("unchecked") 526 @Override 527 public K get(Object key) { 528 return (K)key; 529 } 530 531 /** 532 * Always returns the empty set 533 */ 534 @Override 535 public Set<java.util.Map.Entry<K, K>> entrySet() { 536 return Collections.emptySet(); 537 } 538 539 @Override 540 public boolean containsKey(Object key) { 541 return true; 542 } 543 } 544 545 /** 546 * Fundamentally, an alignment is just a list of aligned residues in each 547 * protein. This method converts two lists of ResidueNumbers into an 548 * AFPChain. 549 * 550 * <p>Parameters are filled with defaults (often null) or sometimes 551 * calculated. 552 * 553 * <p>For a way to modify the alignment of an existing AFPChain, see 554 * {@link AlignmentTools#replaceOptAln(AFPChain, Atom[], Atom[], Map)} 555 * @param ca1 CA atoms of the first protein 556 * @param ca2 CA atoms of the second protein 557 * @param aligned1 A list of aligned residues from the first protein 558 * @param aligned2 A list of aligned residues from the second protein. 559 * Must be the same length as aligned1. 560 * @return An AFPChain representing the alignment. Many properties may be 561 * null or another default. 562 * @throws StructureException if an error occured during superposition 563 * @throws IllegalArgumentException if aligned1 and aligned2 have different 564 * lengths 565 * @see AlignmentTools#replaceOptAln(AFPChain, Atom[], Atom[], Map) 566 */ 567 public static AFPChain createAFPChain(Atom[] ca1, Atom[] ca2, 568 ResidueNumber[] aligned1, ResidueNumber[] aligned2 ) throws StructureException { 569 //input validation 570 int alnLen = aligned1.length; 571 if(alnLen != aligned2.length) { 572 throw new IllegalArgumentException("Alignment lengths are not equal"); 573 } 574 575 AFPChain a = new AFPChain(AFPChain.UNKNOWN_ALGORITHM); 576 try { 577 a.setName1(ca1[0].getGroup().getChain().getStructure().getName()); 578 if(ca2[0].getGroup().getChain().getStructure() != null) { 579 // common case for cloned ca2 580 a.setName2(ca2[0].getGroup().getChain().getStructure().getName()); 581 } 582 } catch(Exception e) { 583 // One of the structures wasn't fully created. Ignore 584 } 585 a.setBlockNum(1); 586 a.setCa1Length(ca1.length); 587 a.setCa2Length(ca2.length); 588 589 a.setOptLength(alnLen); 590 a.setOptLen(new int[] {alnLen}); 591 592 593 Matrix[] ms = new Matrix[a.getBlockNum()]; 594 a.setBlockRotationMatrix(ms); 595 Atom[] blockShiftVector = new Atom[a.getBlockNum()]; 596 a.setBlockShiftVector(blockShiftVector); 597 598 String[][][] pdbAln = new String[1][2][alnLen]; 599 for(int i=0;i<alnLen;i++) { 600 pdbAln[0][0][i] = aligned1[i].getChainName()+":"+aligned1[i]; 601 pdbAln[0][1][i] = aligned2[i].getChainName()+":"+aligned2[i]; 602 } 603 604 a.setPdbAln(pdbAln); 605 606 // convert pdbAln to optAln, and fill in some other basic parameters 607 AFPChainXMLParser.rebuildAFPChain(a, ca1, ca2); 608 609 return a; 610 611 // Currently a single block. Split into several blocks by sequence if needed 612 // return AlignmentTools.splitBlocksByTopology(a,ca1,ca2); 613 } 614 615 /** 616 * 617 * @param a 618 * @param ca1 619 * @param ca2 620 * @return 621 * @throws StructureException if an error occurred during superposition 622 */ 623 public static AFPChain splitBlocksByTopology(AFPChain a, Atom[] ca1, Atom[] ca2) throws StructureException { 624 int[][][] optAln = a.getOptAln(); 625 int blockNum = a.getBlockNum(); 626 int[] optLen = a.getOptLen(); 627 628 // Determine block lengths 629 // Split blocks if residue indices don't increase monotonically 630 List<Integer> newBlkLen = new ArrayList<Integer>(); 631 boolean blockChanged = false; 632 for(int blk=0;blk<blockNum;blk++) { 633 int currLen=1; 634 for(int pos=1;pos<optLen[blk];pos++) { 635 if( optAln[blk][0][pos] <= optAln[blk][0][pos-1] 636 || optAln[blk][1][pos] <= optAln[blk][1][pos-1] ) 637 { 638 //start a new block 639 newBlkLen.add(currLen); 640 currLen = 0; 641 blockChanged = true; 642 } 643 currLen++; 644 } 645 if(optLen[blk] < 2 ) { 646 newBlkLen.add(optLen[blk]); 647 } else { 648 newBlkLen.add(currLen); 649 } 650 } 651 652 // Check if anything needs to be split 653 if( !blockChanged ) { 654 return a; 655 } 656 657 // Split blocks 658 List<int[][]> blocks = new ArrayList<int[][]>( newBlkLen.size() ); 659 660 int oldBlk = 0; 661 int pos = 0; 662 for(int blkLen : newBlkLen) { 663 if( blkLen == optLen[oldBlk] ) { 664 assert(pos == 0); //should be the whole block 665 // Use the old block 666 blocks.add(optAln[oldBlk]); 667 } else { 668 int[][] newBlock = new int[2][blkLen]; 669 assert( pos+blkLen <= optLen[oldBlk] ); // don't overrun block 670 for(int i=0; i<blkLen;i++) { 671 newBlock[0][i] = optAln[oldBlk][0][pos + i]; 672 newBlock[1][i] = optAln[oldBlk][1][pos + i]; 673 } 674 pos += blkLen; 675 blocks.add(newBlock); 676 677 if( pos == optLen[oldBlk] ) { 678 // Finished this oldBlk, start the next 679 oldBlk++; 680 pos = 0; 681 } 682 } 683 } 684 685 // Store new blocks 686 int[][][] newOptAln = blocks.toArray(new int[0][][]); 687 int[] newBlockLens = new int[newBlkLen.size()]; 688 for(int i=0;i<newBlkLen.size();i++) { 689 newBlockLens[i] = newBlkLen.get(i); 690 } 691 692 return replaceOptAln(a, ca1, ca2, blocks.size(), newBlockLens, newOptAln); 693 } 694 695 /** 696 * It replaces an optimal alignment of an AFPChain and calculates all the new alignment scores and variables. 697 */ 698 public static AFPChain replaceOptAln(int[][][] newAlgn, AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws StructureException { 699 700 //The order is the number of groups in the newAlgn 701 int order = newAlgn.length; 702 703 //Calculate the alignment length from all the subunits lengths 704 int[] optLens = new int[order]; 705 for(int s=0;s<order;s++) { 706 optLens[s] = newAlgn[s][0].length; 707 } 708 int optLength = 0; 709 for(int s=0;s<order;s++) { 710 optLength += optLens[s]; 711 } 712 713 //Create a copy of the original AFPChain and set everything needed for the structure update 714 AFPChain copyAFP = (AFPChain) afpChain.clone(); 715 716 //Set the new parameters of the optimal alignment 717 copyAFP.setOptLength(optLength); 718 copyAFP.setOptLen(optLens); 719 copyAFP.setOptAln(newAlgn); 720 721 //Set the block information of the new alignment 722 copyAFP.setBlockNum(order); 723 copyAFP.setBlockSize(optLens); 724 copyAFP.setBlockResList(newAlgn); 725 copyAFP.setBlockResSize(optLens); 726 copyAFP.setBlockGap(calculateBlockGap(newAlgn)); 727 728 //Recalculate properties: superposition, tm-score, etc 729 Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions 730 AlignmentTools.updateSuperposition(copyAFP, ca1, ca2clone); 731 732 //It re-does the sequence alignment strings from the OptAlgn information only 733 copyAFP.setAlnsymb(null); 734 AFPAlignmentDisplay.getAlign(copyAFP, ca1, ca2clone); 735 736 return copyAFP; 737 } 738 739 /** 740 * Takes an AFPChain and replaces the optimal alignment based on an alignment map 741 * 742 * <p>Parameters are filled with defaults (often null) or sometimes 743 * calculated. 744 * 745 * <p>For a way to create a new AFPChain, see 746 * {@link AlignmentTools#createAFPChain(Atom[], Atom[], ResidueNumber[], ResidueNumber[])} 747 * 748 * @param afpChain The alignment to be modified 749 * @param alignment The new alignment, as a Map 750 * @throws StructureException if an error occurred during superposition 751 * @see AlignmentTools#createAFPChain(Atom[], Atom[], ResidueNumber[], ResidueNumber[]) 752 */ 753 public static AFPChain replaceOptAln(AFPChain afpChain, Atom[] ca1, Atom[] ca2, 754 Map<Integer, Integer> alignment) throws StructureException { 755 756 // Determine block lengths 757 // Sort ca1 indices, then start a new block whenever ca2 indices aren't 758 // increasing monotonically. 759 Integer[] res1 = alignment.keySet().toArray(new Integer[0]); 760 Arrays.sort(res1); 761 List<Integer> blockLens = new ArrayList<Integer>(2); 762 int optLength = 0; 763 Integer lastRes = alignment.get(res1[0]); 764 int blkLen = lastRes==null?0:1; 765 for(int i=1;i<res1.length;i++) { 766 Integer currRes = alignment.get(res1[i]); //res2 index 767 assert(currRes != null);// could be converted to if statement if assertion doesn't hold; just modify below as well. 768 if(lastRes<currRes) { 769 blkLen++; 770 } else { 771 // CP! 772 blockLens.add(blkLen); 773 optLength+=blkLen; 774 blkLen = 1; 775 } 776 lastRes = currRes; 777 } 778 blockLens.add(blkLen); 779 optLength+=blkLen; 780 781 // Create array structure for alignment 782 int[][][] optAln = new int[blockLens.size()][][]; 783 int pos1 = 0; //index into res1 784 for(int blk=0;blk<blockLens.size();blk++) { 785 optAln[blk] = new int[2][]; 786 blkLen = blockLens.get(blk); 787 optAln[blk][0] = new int[blkLen]; 788 optAln[blk][1] = new int[blkLen]; 789 int pos = 0; //index into optAln 790 while(pos<blkLen) { 791 optAln[blk][0][pos]=res1[pos1]; 792 Integer currRes = alignment.get(res1[pos1]); 793 optAln[blk][1][pos]=currRes; 794 pos++; 795 pos1++; 796 } 797 } 798 assert(pos1 == optLength); 799 800 // Create length array 801 int[] optLens = new int[blockLens.size()]; 802 for(int i=0;i<blockLens.size();i++) { 803 optLens[i] = blockLens.get(i); 804 } 805 806 return replaceOptAln(afpChain, ca1, ca2, blockLens.size(), optLens, optAln); 807 } 808 809 /** 810 * @param afpChain Input afpchain. UNMODIFIED 811 * @param ca1 812 * @param ca2 813 * @param optLens 814 * @param optAln 815 * @return A NEW AfpChain based off the input but with the optAln modified 816 * @throws StructureException if an error occured during superposition 817 */ 818 public static AFPChain replaceOptAln(AFPChain afpChain, Atom[] ca1, Atom[] ca2, 819 int blockNum, int[] optLens, int[][][] optAln) throws StructureException { 820 int optLength = 0; 821 for( int blk=0;blk<blockNum;blk++) { 822 optLength += optLens[blk]; 823 } 824 825 //set everything 826 AFPChain refinedAFP = (AFPChain) afpChain.clone(); 827 refinedAFP.setOptLength(optLength); 828 refinedAFP.setBlockSize(optLens); 829 refinedAFP.setOptLen(optLens); 830 refinedAFP.setOptAln(optAln); 831 refinedAFP.setBlockNum(blockNum); 832 833 //TODO recalculate properties: superposition, tm-score, etc 834 Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca2 positions 835 AlignmentTools.updateSuperposition(refinedAFP, ca1, ca2clone); 836 837 AFPAlignmentDisplay.getAlign(refinedAFP, ca1, ca2clone); 838 return refinedAFP; 839 } 840 841 842 /** 843 * After the alignment changes (optAln, optLen, blockNum, at a minimum), 844 * many other properties which depend on the superposition will be invalid. 845 * 846 * This method re-runs a rigid superposition over the whole alignment 847 * and repopulates the required properties, including RMSD (TotalRMSD) and 848 * TM-Score. 849 * @param afpChain 850 * @param ca1 851 * @param ca2 Second set of ca atoms. Will be modified based on the superposition 852 * @throws StructureException 853 * @see {@link CECalculator#calc_rmsd(Atom[], Atom[], int, boolean)} 854 * contains much of the same code, but stores results in a CECalculator 855 * instance rather than an AFPChain 856 */ 857 public static void updateSuperposition(AFPChain afpChain, Atom[] ca1, 858 Atom[] ca2) throws StructureException { 859 860 //Update ca information, because the atom array might also be changed 861 afpChain.setCa1Length(ca1.length); 862 afpChain.setCa2Length(ca2.length); 863 864 //We need this to get the correct superposition 865 int[] focusRes1 = afpChain.getFocusRes1(); 866 int[] focusRes2 = afpChain.getFocusRes2(); 867 if (focusRes1 == null) { 868 focusRes1 = new int[afpChain.getCa1Length()]; 869 afpChain.setFocusRes1(focusRes1); 870 } 871 if (focusRes2 == null) { 872 focusRes2 = new int[afpChain.getCa2Length()]; 873 afpChain.setFocusRes2(focusRes2); 874 } 875 876 if (afpChain.getNrEQR() == 0) return; 877 878 // create new arrays for the subset of atoms in the alignment. 879 Atom[] ca1aligned = new Atom[afpChain.getOptLength()]; 880 Atom[] ca2aligned = new Atom[afpChain.getOptLength()]; 881 882 fillAlignedAtomArrays(afpChain, ca1, ca2, ca1aligned, ca2aligned); 883 884 //Superimpose the two structures in correspondance to the new alignment 885 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(ca1aligned), 886 Calc.atomsToPoints(ca2aligned)); 887 888 Matrix matrix = Matrices.getRotationJAMA(trans); 889 Atom shift = Calc.getTranslationVector(trans); 890 891 Matrix[] blockMxs = new Matrix[afpChain.getBlockNum()]; 892 Arrays.fill(blockMxs, matrix); 893 afpChain.setBlockRotationMatrix(blockMxs); 894 Atom[] blockShifts = new Atom[afpChain.getBlockNum()]; 895 Arrays.fill(blockShifts, shift); 896 afpChain.setBlockShiftVector(blockShifts); 897 898 for (Atom a : ca2aligned) { 899 Calc.rotate(a, matrix); 900 Calc.shift(a, shift); 901 } 902 903 //Calculate the RMSD and TM score for the new alignment 904 double rmsd = Calc.rmsd(ca1aligned, ca2aligned); 905 double tmScore = Calc.getTMScore(ca1aligned, ca2aligned, ca1.length, ca2.length); 906 afpChain.setTotalRmsdOpt(rmsd); 907 afpChain.setTMScore(tmScore); 908 909 int[] blockLens = afpChain.getOptLen(); 910 int[][][] optAln = afpChain.getOptAln(); 911 912 //Calculate the RMSD and TM score for every block of the new alignment 913 double[] blockRMSD = new double[afpChain.getBlockNum()]; 914 double[] blockScore = new double[afpChain.getBlockNum()]; 915 for (int k=0; k<afpChain.getBlockNum(); k++){ 916 //Create the atom arrays corresponding to the aligned residues in the block 917 Atom[] ca1block = new Atom[afpChain.getOptLen()[k]]; 918 Atom[] ca2block = new Atom[afpChain.getOptLen()[k]]; 919 int position=0; 920 for(int i=0;i<blockLens[k];i++) { 921 int pos1 = optAln[k][0][i]; 922 int pos2 = optAln[k][1][i]; 923 Atom a1 = ca1[pos1]; 924 Atom a2 = (Atom) ca2[pos2].clone(); 925 ca1block[position] = a1; 926 ca2block[position] = a2; 927 position++; 928 } 929 if (position != afpChain.getOptLen()[k]){ 930 logger.warn("AFPChainScorer getTMScore: Problems reconstructing block alignment! nr of loaded atoms is " + position + " but should be " + afpChain.getOptLen()[k]); 931 // we need to resize the array, because we allocated too many atoms earlier on. 932 ca1block = (Atom[]) resizeArray(ca1block, position); 933 ca2block = (Atom[]) resizeArray(ca2block, position); 934 } 935 //Superimpose the two block structures 936 Matrix4d transb = SuperPositions.superpose(Calc.atomsToPoints(ca1block), 937 Calc.atomsToPoints(ca2block)); 938 939 blockMxs[k] = Matrices.getRotationJAMA(trans); 940 blockShifts[k] = Calc.getTranslationVector(trans); 941 942 Calc.transform(ca2block, transb); 943 944 //Calculate the RMSD and TM score for the block 945 double rmsdb = Calc.rmsd(ca1block, ca2block); 946 double tmScoreb = Calc.getTMScore(ca1block, ca2block, ca1.length, ca2.length); 947 blockRMSD[k] = rmsdb; 948 blockScore[k] = tmScoreb; 949 } 950 afpChain.setOptRmsd(blockRMSD); 951 afpChain.setBlockRmsd(blockRMSD); 952 afpChain.setBlockScore(blockScore); 953 } 954 955 /** 956 * Reallocates an array with a new size, and copies the contents 957 * of the old array to the new array. 958 * @param oldArray the old array, to be reallocated. 959 * @param newSize the new array size. 960 * @return A new array with the same contents. 961 */ 962 public static Object resizeArray (Object oldArray, int newSize) { 963 int oldSize = java.lang.reflect.Array.getLength(oldArray); 964 @SuppressWarnings("rawtypes") 965 Class elementType = oldArray.getClass().getComponentType(); 966 Object newArray = java.lang.reflect.Array.newInstance( 967 elementType,newSize); 968 int preserveLength = Math.min(oldSize,newSize); 969 if (preserveLength > 0) 970 System.arraycopy (oldArray,0,newArray,0,preserveLength); 971 return newArray; 972 } 973 974 /** 975 * Print an alignment map in a concise representation. Edges are given 976 * as two numbers separated by '>'. They are chained together where possible, 977 * or separated by spaces where disjoint or branched. 978 * 979 * <p>Note that more concise representations may be possible.</p> 980 * 981 * Examples: 982 * <li>1>2>3>1</li> 983 * <li>1>2>3>2 4>3</li> 984 * 985 * @param alignment The input function, as a map (see {@link AlignmentTools#alignmentAsMap(AFPChain)}) 986 * @param identity An identity-like function providing the isomorphism between 987 * the codomain of alignment (of type <T>) and the domain (type <S>). 988 * @return 989 */ 990 public static <S,T> String toConciseAlignmentString(Map<S,T> alignment, Map<T,S> identity) { 991 // Clone input to prevent changes 992 Map<S,T> alig = new HashMap<S,T>(alignment); 993 994 // Generate inverse alignment 995 Map<S,List<S>> inverse = new HashMap<S,List<S>>(); 996 for(Entry<S,T> e: alig.entrySet()) { 997 S val = identity.get(e.getValue()); 998 if( inverse.containsKey(val) ) { 999 List<S> l = inverse.get(val); 1000 l.add(e.getKey()); 1001 } else { 1002 List<S> l = new ArrayList<S>(); 1003 l.add(e.getKey()); 1004 inverse.put(val,l); 1005 } 1006 } 1007 1008 StringBuilder str = new StringBuilder(); 1009 1010 while(!alig.isEmpty()){ 1011 // Pick an edge and work upstream to a root or cycle 1012 S seedNode = alig.keySet().iterator().next(); 1013 S node = seedNode; 1014 if( inverse.containsKey(seedNode)) { 1015 node = inverse.get(seedNode).iterator().next(); 1016 while( node != seedNode && inverse.containsKey(node)) { 1017 node = inverse.get(node).iterator().next(); 1018 } 1019 } 1020 1021 // Now work downstream, deleting edges as we go 1022 seedNode = node; 1023 str.append(node); 1024 1025 while(alig.containsKey(node)) { 1026 S lastNode = node; 1027 node = identity.get( alig.get(lastNode) ); 1028 1029 // Output 1030 str.append('>'); 1031 str.append(node); 1032 1033 // Remove edge 1034 alig.remove(lastNode); 1035 List<S> inv = inverse.get(node); 1036 if(inv.size() > 1) { 1037 inv.remove(node); 1038 } else { 1039 inverse.remove(node); 1040 } 1041 } 1042 if(!alig.isEmpty()) { 1043 str.append(' '); 1044 } 1045 } 1046 1047 return str.toString(); 1048 } 1049 1050 /** 1051 * @see #toConciseAlignmentString(Map, Map) 1052 */ 1053 public static <T> String toConciseAlignmentString(Map<T, T> alignment) { 1054 return toConciseAlignmentString(alignment, new IdentityMap<T>()); 1055 } 1056 1057 /** 1058 * @see #toConciseAlignmentString(Map, Map) 1059 */ 1060 public static Map<Integer, Integer> fromConciseAlignmentString(String string) { 1061 Map<Integer, Integer> map = new HashMap<Integer, Integer>(); 1062 boolean matches = true; 1063 while (matches) { 1064 Pattern pattern = Pattern.compile("(\\d+)>(\\d+)"); 1065 Matcher matcher = pattern.matcher(string); 1066 matches = matcher.find(); 1067 if (matches) { 1068 Integer from = Integer.parseInt(matcher.group(1)); 1069 Integer to = Integer.parseInt(matcher.group(2)); 1070 map.put(from, to); 1071 string = string.substring(matcher.end(1) + 1); 1072 } 1073 } 1074 return map; 1075 } 1076 1077 /** 1078 * Method that calculates the number of gaps in each subunit block of an optimal AFP alignment. 1079 * 1080 * INPUT: an optimal alignment in the format int[][][]. 1081 * OUTPUT: an int[] array of <order> length containing the gaps in each block as int[block]. 1082 */ 1083 public static int[] calculateBlockGap(int[][][] optAln){ 1084 1085 //Initialize the array to be returned 1086 int [] blockGap = new int[optAln.length]; 1087 1088 //Loop for every block and look in both chains for non-contiguous residues. 1089 for (int i=0; i<optAln.length; i++){ 1090 int gaps = 0; //the number of gaps in that block 1091 int last1 = 0; //the last residue position in chain 1 1092 int last2 = 0; //the last residue position in chain 2 1093 //Loop for every position in the block 1094 for (int j=0; j<optAln[i][0].length; j++){ 1095 //If the first position is evaluated initialize the last positions 1096 if (j==0){ 1097 last1 = optAln[i][0][j]; 1098 last2 = optAln[i][1][j]; 1099 } 1100 else{ 1101 //If one of the positions or both are not contiguous increment the number of gaps 1102 if (optAln[i][0][j] > last1+1 || optAln[i][1][j] > last2+1){ 1103 gaps++; 1104 last1 = optAln[i][0][j]; 1105 last2 = optAln[i][1][j]; 1106 } 1107 //Otherwise just set the last position to the current one 1108 else{ 1109 last1 = optAln[i][0][j]; 1110 last2 = optAln[i][1][j]; 1111 } 1112 } 1113 } 1114 blockGap[i] = gaps; 1115 } 1116 return blockGap; 1117 } 1118 1119 /** 1120 * Creates a simple interaction format (SIF) file for an alignment. 1121 * 1122 * The SIF file can be read by network software (eg Cytoscape) to analyze 1123 * alignments as graphs. 1124 * 1125 * This function creates a graph with residues as nodes and two types of edges: 1126 * 1. backbone edges, which connect adjacent residues in the aligned protein 1127 * 2. alignment edges, which connect aligned residues 1128 * 1129 * @param out Stream to write to 1130 * @param afpChain alignment to write 1131 * @param ca1 First protein, used to generate node names 1132 * @param ca2 Second protein, used to generate node names 1133 * @param backboneInteraction Two-letter string used to identify backbone edges 1134 * @param alignmentInteraction Two-letter string used to identify alignment edges 1135 * @throws IOException 1136 */ 1137 public static void alignmentToSIF(Writer out,AFPChain afpChain, 1138 Atom[] ca1,Atom[] ca2, String backboneInteraction, 1139 String alignmentInteraction) throws IOException { 1140 1141 //out.write("Res1\tInteraction\tRes2\n"); 1142 String name1 = afpChain.getName1(); 1143 String name2 = afpChain.getName2(); 1144 if(name1==null) name1=""; else name1+=":"; 1145 if(name2==null) name2=""; else name2+=":"; 1146 1147 // Print alignment edges 1148 int nblocks = afpChain.getBlockNum(); 1149 int[] blockLen = afpChain.getOptLen(); 1150 int[][][] optAlign = afpChain.getOptAln(); 1151 for(int b=0;b<nblocks;b++) { 1152 for(int r=0;r<blockLen[b];r++) { 1153 int res1 = optAlign[b][0][r]; 1154 int res2 = optAlign[b][1][r]; 1155 1156 ResidueNumber rn1 = ca1[res1].getGroup().getResidueNumber(); 1157 ResidueNumber rn2 = ca2[res2].getGroup().getResidueNumber(); 1158 1159 String node1 = name1+rn1.getChainName()+rn1.toString(); 1160 String node2 = name2+rn2.getChainName()+rn2.toString(); 1161 1162 out.write(String.format("%s\t%s\t%s\n",node1, alignmentInteraction, node2)); 1163 } 1164 } 1165 1166 // Print first backbone edges 1167 ResidueNumber rn = ca1[0].getGroup().getResidueNumber(); 1168 String last = name1+rn.getChainName()+rn.toString(); 1169 for(int i=1;i<ca1.length;i++) { 1170 rn = ca1[i].getGroup().getResidueNumber(); 1171 String curr = name1+rn.getChainName()+rn.toString(); 1172 out.write(String.format("%s\t%s\t%s\n",last, backboneInteraction, curr)); 1173 last = curr; 1174 } 1175 1176 // Print second backbone edges, if the proteins differ 1177 // Do some quick checks for whether the proteins differ 1178 // (Not perfect, but should detect major differences and CPs.) 1179 if(!name1.equals(name2) || 1180 ca1.length!=ca2.length || 1181 (ca1.length>0 && ca1[0].getGroup()!=null && ca2[0].getGroup()!=null && 1182 !ca1[0].getGroup().getResidueNumber().equals(ca2[0].getGroup().getResidueNumber()) ) ) { 1183 rn = ca2[0].getGroup().getResidueNumber(); 1184 last = name2+rn.getChainName()+rn.toString(); 1185 for(int i=1;i<ca2.length;i++) { 1186 rn = ca2[i].getGroup().getResidueNumber(); 1187 String curr = name2+rn.getChainName()+rn.toString(); 1188 out.write(String.format("%s\t%s\t%s\n",last, backboneInteraction, curr)); 1189 last = curr; 1190 } 1191 } 1192 } 1193 1194 1195 1196 /** get an artificial List of chains containing the Atoms and groups. 1197 * Does NOT rotate anything. 1198 * @param ca 1199 * @return a list of Chains that is built up from the Atoms in the ca array 1200 * @throws StructureException 1201 */ 1202 public static final List<Chain> getAlignedModel(Atom[] ca){ 1203 1204 List<Chain> model = new ArrayList<Chain>(); 1205 for ( Atom a: ca){ 1206 1207 Group g = a.getGroup(); 1208 Chain parentC = g.getChain(); 1209 1210 Chain newChain = null; 1211 for ( Chain c : model) { 1212 if ( c.getId().equals(parentC.getId())){ 1213 newChain = c; 1214 break; 1215 } 1216 } 1217 if ( newChain == null){ 1218 1219 newChain = new ChainImpl(); 1220 1221 newChain.setId(parentC.getId()); 1222 1223 model.add(newChain); 1224 } 1225 1226 newChain.addGroup(g); 1227 1228 } 1229 1230 return model; 1231 } 1232 1233 1234 /** Get an artifical Structure containing both chains. 1235 * Does NOT rotate anything 1236 * @param ca1 1237 * @param ca2 1238 * @return a structure object containing two models, one for each set of Atoms. 1239 * @throws StructureException 1240 */ 1241 public static final Structure getAlignedStructure(Atom[] ca1, Atom[] ca2) throws StructureException{ 1242 1243 /* Previous implementation commented 1244 1245 Structure s = new StructureImpl(); 1246 1247 1248 List<Chain>model1 = getAlignedModel(ca1); 1249 List<Chain>model2 = getAlignedModel(ca2); 1250 s.addModel(model1); 1251 s.addModel(model2); 1252 1253 return s;*/ 1254 1255 Structure s = new StructureImpl(); 1256 1257 List<Chain>model1 = getAlignedModel(ca1); 1258 s.addModel(model1); 1259 List<Chain> model2 = getAlignedModel(ca2); 1260 s.addModel(model2); 1261 1262 return s; 1263 } 1264 1265 /** Rotate the Atoms/Groups so they are aligned for the 3D visualisation 1266 * 1267 * @param afpChain 1268 * @param ca1 1269 * @param ca2 1270 * @return an array of Groups that are transformed for 3D display 1271 * @throws StructureException 1272 */ 1273 public static Group[] prepareGroupsForDisplay(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws StructureException{ 1274 1275 1276 if ( afpChain.getBlockRotationMatrix().length == 0 ) { 1277 // probably the alignment is too short! 1278 System.err.println("No rotation matrix found to rotate 2nd structure!"); 1279 afpChain.setBlockRotationMatrix(new Matrix[]{Matrix.identity(3, 3)}); 1280 afpChain.setBlockShiftVector(new Atom[]{new AtomImpl()}); 1281 } 1282 1283 // List of groups to be rotated according to the alignment 1284 Group[] twistedGroups = new Group[ ca2.length]; 1285 1286 //int blockNum = afpChain.getBlockNum(); 1287 1288 int i = -1; 1289 1290 // List of groups from the structure not included in ca2 (e.g. ligands) 1291 // Will be rotated according to first block 1292 List<Group> hetatms2 = StructureTools.getUnalignedGroups(ca2); 1293 1294 if ( (afpChain.getAlgorithmName().equals(FatCatRigid.algorithmName) ) || (afpChain.getAlgorithmName().equals(FatCatFlexible.algorithmName) ) ){ 1295 1296 for (Atom a: ca2){ 1297 i++; 1298 twistedGroups[i]=a.getGroup(); 1299 1300 } 1301 1302 twistedGroups = AFPTwister.twistOptimized(afpChain, ca1, ca2); 1303 1304 //} else if (( blockNum == 1 ) || (afpChain.getAlgorithmName().equals(CeCPMain.algorithmName))) { 1305 } else { 1306 1307 Matrix m = afpChain.getBlockRotationMatrix()[ 0]; 1308 Atom shift = afpChain.getBlockShiftVector() [ 0 ]; 1309 1310 shiftCA2(afpChain, ca2, m,shift, twistedGroups); 1311 1312 } 1313 1314 if ( afpChain.getBlockNum() > 0){ 1315 1316 // Superimpose ligands relative to the first block 1317 if( hetatms2.size() > 0 ) { 1318 1319 if ( afpChain.getBlockRotationMatrix().length > 0 ) { 1320 1321 Matrix m1 = afpChain.getBlockRotationMatrix()[0]; 1322 //m1.print(3,3); 1323 Atom vector1 = afpChain.getBlockShiftVector()[0]; 1324 //System.out.println("shift vector:" + vector1); 1325 1326 for ( Group g : hetatms2){ 1327 Calc.rotate(g, m1); 1328 Calc.shift(g,vector1); 1329 } 1330 } 1331 } 1332 } 1333 1334 return twistedGroups; 1335 } 1336 1337 /** only shift CA positions. 1338 * 1339 */ 1340 public static void shiftCA2(AFPChain afpChain, Atom[] ca2, Matrix m, Atom shift, Group[] twistedGroups) { 1341 1342 int i = -1; 1343 for (Atom a: ca2){ 1344 i++; 1345 Group g = a.getGroup(); 1346 1347 Calc.rotate(g,m); 1348 Calc.shift(g, shift); 1349 1350 if (g.hasAltLoc()){ 1351 for (Group alt: g.getAltLocs()){ 1352 for (Atom alta : alt.getAtoms()){ 1353 if ( g.getAtoms().contains(alta)) 1354 continue; 1355 Calc.rotate(alta,m); 1356 Calc.shift(alta,shift); 1357 } 1358 } 1359 } 1360 twistedGroups[i]=g; 1361 } 1362 } 1363 1364 /** 1365 * Fill the aligned Atom arrays with the equivalent residues in the afpChain. 1366 * @param afpChain 1367 * @param ca1 1368 * @param ca2 1369 * @param ca1aligned 1370 * @param ca2aligned 1371 */ 1372 public static void fillAlignedAtomArrays(AFPChain afpChain, Atom[] ca1, 1373 Atom[] ca2, Atom[] ca1aligned, Atom[] ca2aligned) { 1374 1375 int pos=0; 1376 int[] blockLens = afpChain.getOptLen(); 1377 int[][][] optAln = afpChain.getOptAln(); 1378 assert(afpChain.getBlockNum() <= optAln.length); 1379 1380 for (int block=0; block < afpChain.getBlockNum(); block++) { 1381 for(int i=0;i<blockLens[block];i++) { 1382 int pos1 = optAln[block][0][i]; 1383 int pos2 = optAln[block][1][i]; 1384 Atom a1 = ca1[pos1]; 1385 Atom a2 = (Atom) ca2[pos2].clone(); 1386 ca1aligned[pos] = a1; 1387 ca2aligned[pos] = a2; 1388 pos++; 1389 } 1390 } 1391 1392 // this can happen when we load an old XML serialization which did not support modern ChemComp representation of modified residues. 1393 if (pos != afpChain.getOptLength()){ 1394 logger.warn("AFPChainScorer getTMScore: Problems reconstructing alignment! nr of loaded atoms is " + pos + " but should be " + afpChain.getOptLength()); 1395 // we need to resize the array, because we allocated too many atoms earlier on. 1396 ca1aligned = (Atom[]) resizeArray(ca1aligned, pos); 1397 ca2aligned = (Atom[]) resizeArray(ca2aligned, pos); 1398 } 1399 1400 } 1401 1402 /** 1403 * Find the alignment position with the highest atomic distance between the 1404 * equivalent atomic positions of the arrays and remove it from the 1405 * alignment. 1406 * 1407 * @param afpChain 1408 * original alignment, will be modified 1409 * @param ca1 1410 * atom array, will not be modified 1411 * @param ca2 1412 * atom array, will not be modified 1413 * @return the original alignment, with the alignment position at the 1414 * highest distance removed 1415 * @throws StructureException 1416 */ 1417 public static AFPChain deleteHighestDistanceColumn(AFPChain afpChain, 1418 Atom[] ca1, Atom[] ca2) throws StructureException { 1419 1420 int[][][] optAln = afpChain.getOptAln(); 1421 1422 int maxBlock = 0; 1423 int maxPos = 0; 1424 double maxDistance = Double.MIN_VALUE; 1425 1426 for (int b = 0; b < optAln.length; b++) { 1427 for (int p = 0; p < optAln[b][0].length; p++) { 1428 Atom ca2clone = ca2[optAln[b][1][p]]; 1429 Calc.rotate(ca2clone, afpChain.getBlockRotationMatrix()[b]); 1430 Calc.shift(ca2clone, afpChain.getBlockShiftVector()[b]); 1431 1432 double distance = Calc.getDistance(ca1[optAln[b][0][p]], 1433 ca2clone); 1434 if (distance > maxDistance) { 1435 maxBlock = b; 1436 maxPos = p; 1437 maxDistance = distance; 1438 } 1439 } 1440 } 1441 1442 return deleteColumn(afpChain, ca1, ca2, maxBlock, maxPos); 1443 } 1444 1445 /** 1446 * Delete an alignment position from the original alignment object. 1447 * 1448 * @param afpChain 1449 * original alignment, will be modified 1450 * @param ca1 1451 * atom array, will not be modified 1452 * @param ca2 1453 * atom array, will not be modified 1454 * @param block 1455 * block of the alignment position 1456 * @param pos 1457 * position index in the block 1458 * @return the original alignment, with the alignment position removed 1459 * @throws StructureException 1460 */ 1461 public static AFPChain deleteColumn(AFPChain afpChain, Atom[] ca1, 1462 Atom[] ca2, int block, int pos) throws StructureException { 1463 1464 // Check validity of the inputs 1465 if (afpChain.getBlockNum() <= block) { 1466 throw new IndexOutOfBoundsException(String.format( 1467 "Block index requested (%d) is higher than the total number of AFPChain blocks (%d).", 1468 block, afpChain.getBlockNum())); 1469 } 1470 if (afpChain.getOptAln()[block][0].length <= pos) { 1471 throw new IndexOutOfBoundsException(String.format( 1472 "Position index requested (%d) is higher than the total number of aligned position in the AFPChain block (%d).", 1473 block, afpChain.getBlockSize()[block])); 1474 } 1475 1476 int[][][] optAln = afpChain.getOptAln(); 1477 1478 int[] newPos0 = new int[optAln[block][0].length - 1]; 1479 int[] newPos1 = new int[optAln[block][1].length - 1]; 1480 1481 int position = 0; 1482 for (int p = 0; p < optAln[block][0].length; p++) { 1483 1484 if (p == pos) 1485 continue; 1486 1487 newPos0[position] = optAln[block][0][p]; 1488 newPos1[position] = optAln[block][1][p]; 1489 1490 position++; 1491 } 1492 1493 optAln[block][0] = newPos0; 1494 optAln[block][1] = newPos1; 1495 1496 return AlignmentTools.replaceOptAln(optAln, afpChain, ca1, ca2); 1497 } 1498}