001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Mar 9, 2010 021 * Author: Spencer Bliven 022 * 023 */ 024 025package org.biojava.nbio.structure.align.ce; 026 027 028import org.biojava.nbio.structure.*; 029import org.biojava.nbio.structure.align.model.AFPChain; 030import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 031import org.biojava.nbio.structure.align.util.ConfigurationException; 032import org.biojava.nbio.structure.geometry.Matrices; 033import org.biojava.nbio.structure.geometry.SuperPositions; 034import org.biojava.nbio.structure.jama.Matrix; 035 036import java.lang.reflect.InvocationTargetException; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.List; 040 041import javax.vecmath.Matrix4d; 042 043/** 044 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding 045 * circular permutations. 046 * <p> 047 * A circular permutation consists of a single cleavage point and rearrangement 048 * between two structures, for example: 049 * <pre> 050 * ABCDEFG 051 * DEFGABC 052 * </pre> 053 * @author Spencer Bliven. 054 * 055 */ 056public class CeCPMain extends CeMain { 057 private static boolean debug = false; 058 059 public static final String algorithmName = "jCE Circular Permutation"; 060 061 /** 062 * version history: 063 * 1.5 - Added more parameters to the command line, including -maxOptRMSD 064 * 1.4 - Added DuplicationHint parameter & default to duplicating the shorter chain 065 * 1.3 - Short CPs are now discarded 066 * 1.2 - now supports check AlignmentTools.isSequentialAlignment. XML protocol 067 * 1.1 - skipped, (trying to avoid confusion with jfatcat in all vs. all comparisons) 068 * 1.0 - initial release 069 */ 070 public static final String version = "1.5"; 071 072 public CeCPMain(){ 073 super(); 074 params = new CECPParameters(); 075 } 076 077 @Override 078 public String getAlgorithmName() { 079 return CeCPMain.algorithmName; 080 } 081 082 @Override 083 public String getVersion() { 084 return CeCPMain.version; 085 } 086 087 public static void main(String[] args) throws ConfigurationException { 088 CeCPUserArgumentProcessor processor = new CeCPUserArgumentProcessor(); //Responsible for creating a CeCPMain instance 089 processor.process(args); 090 } 091 092 093 /** 094 * Aligns ca1 and ca2 using a heuristic to check for CPs. 095 * <p> 096 * Aligns ca1 against a doubled ca2, then cleans up the alignment. 097 * @param ca1 098 * @param ca2 099 * @param param 100 * @return the alignment, possibly containing a CP. 101 * @throws StructureException 102 */ 103 @Override 104 public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) throws StructureException{ 105 if ( ! (param instanceof CECPParameters)) 106 throw new IllegalArgumentException("CE algorithm needs an object of call CeParameters as argument."); 107 108 CECPParameters cpparams = (CECPParameters) param; 109 this.params = cpparams; 110 111 boolean duplicateRight; 112 113 switch( cpparams.getDuplicationHint() ) { 114 case LEFT: 115 duplicateRight = false; 116 break; 117 case RIGHT: 118 duplicateRight = true; 119 break; 120 case SHORTER: 121 duplicateRight = ca1.length >= ca2.length; 122 break; 123 default: 124 duplicateRight = true; 125 } 126 127 128 if( duplicateRight ) { 129 return alignRight(ca1, ca2, cpparams); 130 } else { 131 if(debug) { 132 System.out.println("Swapping alignment order."); 133 } 134 AFPChain afpChain = this.alignRight(ca2, ca1, cpparams); 135 return invertAlignment(afpChain); 136 } 137 } 138 139 /** 140 * Aligns the structures, duplicating ca2 regardless of 141 * {@link CECPParameters.getDuplicationHint() param.getDuplicationHint}. 142 * @param ca1 143 * @param ca2 144 * @param cpparams 145 * @return 146 * @throws StructureException 147 */ 148 private AFPChain alignRight(Atom[] ca1, Atom[] ca2, CECPParameters cpparams) 149 throws StructureException { 150 long startTime = System.currentTimeMillis(); 151 152 Atom[] ca2m = StructureTools.duplicateCA2(ca2); 153 154 if(debug) { 155 System.out.format("Duplicating ca2 took %s ms\n",System.currentTimeMillis()-startTime); 156 startTime = System.currentTimeMillis(); 157 } 158 159 // Do alignment 160 AFPChain afpChain = super.align(ca1, ca2m,params); 161 162 // since the process of creating ca2m strips the name info away, set it explicitely 163 try { 164 afpChain.setName2(ca2[0].getGroup().getChain().getStructure().getName()); 165 } catch( Exception e) {} 166 167 if(debug) { 168 System.out.format("Running %dx2*%d alignment took %s ms\n",ca1.length,ca2.length,System.currentTimeMillis()-startTime); 169 startTime = System.currentTimeMillis(); 170 } 171 afpChain = postProcessAlignment(afpChain, ca1, ca2m, calculator, cpparams); 172 173 if(debug) { 174 System.out.format("Finding CP point took %s ms\n",System.currentTimeMillis()-startTime); 175 startTime = System.currentTimeMillis(); 176 } 177 178 return afpChain; 179 } 180 181 182 183 /** Circular permutation specific code to be run after the standard CE alignment 184 * 185 * @param afpChain The finished alignment 186 * @param ca1 CA atoms of the first protein 187 * @param ca2m A duplicated copy of the second protein 188 * @param calculator The CECalculator used to create afpChain 189 * @throws StructureException 190 */ 191 public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator ) throws StructureException{ 192 return postProcessAlignment(afpChain, ca1, ca2m, calculator, null); 193 } 194 195 /** Circular permutation specific code to be run after the standard CE alignment 196 * 197 * @param afpChain The finished alignment 198 * @param ca1 CA atoms of the first protein 199 * @param ca2m A duplicated copy of the second protein 200 * @param calculator The CECalculator used to create afpChain 201 * @param param Parameters 202 * @throws StructureException 203 */ 204 public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator, CECPParameters param ) throws StructureException{ 205 206 // remove bottom half of the matrix 207 Matrix doubledMatrix = afpChain.getDistanceMatrix(); 208 209 // the matrix can be null if the alignment is too short. 210 if ( doubledMatrix != null ) { 211 assert(doubledMatrix.getRowDimension() == ca1.length); 212 assert(doubledMatrix.getColumnDimension() == ca2m.length); 213 214 Matrix singleMatrix = doubledMatrix.getMatrix(0, ca1.length-1, 0, (ca2m.length/2)-1); 215 assert(singleMatrix.getRowDimension() == ca1.length); 216 assert(singleMatrix.getColumnDimension() == (ca2m.length/2)); 217 218 afpChain.setDistanceMatrix(singleMatrix); 219 } 220 // Check for circular permutations 221 int alignLen = afpChain.getOptLength(); 222 if ( alignLen > 0) { 223 afpChain = filterDuplicateAFPs(afpChain,calculator,ca1,ca2m,param); 224 } 225 return afpChain; 226 } 227 228 /** 229 * Swaps the order of structures in an AFPChain 230 * @param a 231 * @return 232 */ 233 public AFPChain invertAlignment(AFPChain a) { 234 String name1 = a.getName1(); 235 String name2 = a.getName2(); 236 a.setName1(name2); 237 a.setName2(name1); 238 239 int len1 = a.getCa1Length(); 240 a.setCa1Length( a.getCa2Length() ); 241 a.setCa2Length( len1 ); 242 243 int beg1 = a.getAlnbeg1(); 244 a.setAlnbeg1(a.getAlnbeg2()); 245 a.setAlnbeg2(beg1); 246 247 char[] alnseq1 = a.getAlnseq1(); 248 a.setAlnseq1(a.getAlnseq2()); 249 a.setAlnseq2(alnseq1); 250 251 Matrix distab1 = a.getDisTable1(); 252 a.setDisTable1(a.getDisTable2()); 253 a.setDisTable2(distab1); 254 255 int[] focusRes1 = a.getFocusRes1(); 256 a.setFocusRes1(a.getFocusRes2()); 257 a.setFocusRes2(focusRes1); 258 259 //What are aftIndex and befIndex used for? How are they indexed? 260 //a.getAfpAftIndex() 261 262 263 String[][][] pdbAln = a.getPdbAln(); 264 if( pdbAln != null) { 265 for(int block = 0; block < a.getBlockNum(); block++) { 266 String[] paln1 = pdbAln[block][0]; 267 pdbAln[block][0] = pdbAln[block][1]; 268 pdbAln[block][1] = paln1; 269 } 270 } 271 272 int[][][] optAln = a.getOptAln(); 273 if( optAln != null ) { 274 for(int block = 0; block < a.getBlockNum(); block++) { 275 int[] aln1 = optAln[block][0]; 276 optAln[block][0] = optAln[block][1]; 277 optAln[block][1] = aln1; 278 } 279 } 280 a.setOptAln(optAln); // triggers invalidate() 281 282 Matrix distmat = a.getDistanceMatrix(); 283 if(distmat != null) 284 a.setDistanceMatrix(distmat.transpose()); 285 286 287 // invert the rotation matrices 288 Matrix[] blockRotMat = a.getBlockRotationMatrix(); 289 Atom[] shiftVec = a.getBlockShiftVector(); 290 if( blockRotMat != null) { 291 for(int block = 0; block < a.getBlockNum(); block++) { 292 if(blockRotMat[block] != null) { 293 // if y=x*A+b, then x=y*inv(A)-b*inv(A) 294 blockRotMat[block] = blockRotMat[block].inverse(); 295 296 Calc.rotate(shiftVec[block],blockRotMat[block]); 297 shiftVec[block] = Calc.invert(shiftVec[block]); 298 } 299 } 300 } 301 302 return a; 303 } 304 305 /** 306 * Takes as input an AFPChain where ca2 has been artificially duplicated. 307 * This raises the possibility that some residues of ca2 will appear in 308 * multiple AFPs. This method filters out duplicates and makes sure that 309 * all AFPs are numbered relative to the original ca2. 310 * 311 * <p>The current version chooses a CP site such that the length of the 312 * alignment is maximized. 313 * 314 * <p>This method does <i>not</i> update scores to reflect the filtered alignment. 315 * It <i>does</i> update the RMSD and superposition. 316 * 317 * @param afpChain The alignment between ca1 and ca2-ca2. Blindly assumes 318 * that ca2 has been duplicated. 319 * @return A new AFPChain consisting of ca1 to ca2, with each residue in 320 * at most 1 AFP. 321 * @throws StructureException 322 */ 323 public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, Atom[] ca1, Atom[] ca2duplicated) throws StructureException { 324 return filterDuplicateAFPs(afpChain, ceCalc, ca1, ca2duplicated, null); 325 } 326 public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, 327 Atom[] ca1, Atom[] ca2duplicated, CECPParameters params) throws StructureException { 328 AFPChain newAFPChain = new AFPChain(afpChain); 329 330 if(params == null) 331 params = new CECPParameters(); 332 333 final int minCPlength = params.getMinCPLength(); 334 335 336 int ca2len = afpChain.getCa2Length()/2; 337 newAFPChain.setCa2Length(ca2len); 338 339 // Fix optimal alignment 340 int[][][] optAln = afpChain.getOptAln(); 341 int[] optLen = afpChain.getOptLen(); 342 int alignLen = afpChain.getOptLength(); 343 if (alignLen < 1) return newAFPChain; 344 345 assert(afpChain.getBlockNum() == 1); // Assume that CE returns just one block 346 347 348 // Determine the region where ca2 and ca2' overlap 349 350 // The bounds of the alignment wrt ca2-ca2' 351 int nStart = optAln[0][1][0]; //alignment N-terminal 352 int cEnd = optAln[0][1][alignLen-1]; // alignment C-terminal 353 // overlap is between nStart and cEnd 354 355 356 int firstRes = nStart; // start res number after trimming 357 int lastRes = nStart+ca2len; // last res number after trimming 358 if(nStart >= ca2len || cEnd < ca2len) { // no circular permutation 359 firstRes=nStart; 360 lastRes=cEnd; 361 } else { 362 // Rule: maximize the length of the alignment 363 364 int overlapLength = cEnd+1 - nStart - ca2len; 365 if(overlapLength <= 0) { 366 // no overlap! 367 368 CPRange minCP = calculateMinCP(optAln[0][1], alignLen, ca2len, minCPlength); 369 370 firstRes=nStart; 371 lastRes=cEnd; 372 373 // Remove short blocks 374 if(firstRes > minCP.n) { 375 firstRes = ca2len; 376 377 if(debug) { 378 System.out.format("Discarding n-terminal block as too " + 379 "short (%d residues, needs %d)\n", 380 minCP.mid, minCPlength); 381 } 382 } 383 384 if(lastRes < minCP.c) { 385 lastRes = ca2len-1; 386 387 if(debug) { 388 System.out.format("Discarding c-terminal block as too " + 389 "short (%d residues, needs %d)\n", 390 optLen[0] - minCP.mid, minCPlength); 391 } 392 } 393 394 } 395 else { 396 // overlap! 397 398 CutPoint cp = calculateCutPoint(optAln[0][1], nStart, cEnd, 399 overlapLength, alignLen, minCPlength, ca2len, firstRes); 400 401 // Adjust alignment length for trimming 402 //alignLen -= cp.numResiduesCut; //TODO inaccurate 403 404 firstRes = cp.firstRes; 405 lastRes = cp.lastRes; 406 407 //TODO Now have CP site, and could do a NxM alignment for further optimization. 408 // For now, does not appear to be worth the 50% increase in time 409 410 //TODO Bug: scores need to be recalculated 411 } 412 } 413 414 415 416 // Fix numbering: 417 // First, split up the atoms into left and right blocks 418 List< ResiduePair > left = new ArrayList<ResiduePair>(); // residues from left of duplication 419 List< ResiduePair > right = new ArrayList<ResiduePair>(); // residues from right of duplication 420 421 for(int i=0;i<optLen[0];i++) { 422 if( optAln[0][1][i] >= firstRes && optAln[0][1][i] <= lastRes ) { // not trimmed 423 if(optAln[0][1][i] < ca2len) { // in first half of ca2 424 left.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i])); 425 } 426 else { 427 right.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]-ca2len)); 428 } 429 } 430 } 431 //assert(left.size()+right.size() == alignLen); 432 alignLen = 0; 433 434 // Now we don't care about left/right, so just call them "blocks" 435 List<List<ResiduePair>> blocks = new ArrayList<List<ResiduePair>>(2); 436 if( !left.isEmpty() ) { 437 blocks.add(left); 438 alignLen += left.size(); 439 } 440 if( !right.isEmpty()) { 441 blocks.add(right); 442 alignLen += right.size(); 443 } 444 left=null; right = null; 445 446 // Put the blocks back together into arrays for the AFPChain 447 int[][][] newAlign = new int[blocks.size()][][]; 448 int[] blockLengths = new int[blocks.size()]; 449 for(int blockNum = 0; blockNum < blocks.size(); blockNum++) { 450 //Alignment 451 List<ResiduePair> block = blocks.get(blockNum); 452 newAlign[blockNum] = new int[2][block.size()]; 453 for(int i=0;i<block.size();i++) { 454 ResiduePair pair = block.get(i); 455 newAlign[blockNum][0][i] = pair.a; 456 newAlign[blockNum][1][i] = pair.b; 457 } 458 459 // Block lengths 460 blockLengths[blockNum] = block.size(); 461 } 462 // Set Alignment 463 newAFPChain.setOptAln(newAlign); 464 newAFPChain.setOptLen(blockLengths ); 465 newAFPChain.setOptLength(alignLen); 466 newAFPChain.setBlockNum(blocks.size()); 467 newAFPChain.setBlockResSize(blockLengths.clone()); 468 newAFPChain.setSequentialAlignment(blocks.size() == 1); 469 470 // TODO make the AFPSet consistent 471 // TODO lots more block properties & old AFP properties 472 473 // Recalculate superposition 474 Atom[] atoms1 = new Atom[alignLen]; 475 Atom[] atoms2 = new Atom[alignLen]; 476 477 int pos=0; 478 for(List<ResiduePair> block:blocks ) { 479 for(ResiduePair pair:block) { 480 atoms1[pos] = ca1[pair.a]; 481 // Clone residue to allow modification 482 Atom atom2 = ca2duplicated[pair.b]; 483 Group g = (Group) atom2.getGroup().clone(); 484 atoms2[pos] = g.getAtom( atom2.getName() ); 485 pos++; 486 } 487 } 488 assert(pos == alignLen); 489 490 // Sets the rotation matrix in ceCalc to the proper value 491 double rmsd = -1; 492 double tmScore = 0.; 493 double[] blockRMSDs = new double[blocks.size()]; 494 Matrix[] blockRotationMatrices = new Matrix[blocks.size()]; 495 Atom[] blockShifts = new Atom[blocks.size()]; 496 497 if(alignLen>0) { 498 499 // superimpose 500 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(atoms1), 501 Calc.atomsToPoints(atoms2)); 502 503 Matrix matrix = Matrices.getRotationJAMA(trans); 504 Atom shift = Calc.getTranslationVector(trans); 505 506 for( Atom a : atoms2 ) 507 Calc.transform(a.getGroup(), trans); 508 509 //and get overall rmsd 510 rmsd = Calc.rmsd(atoms1, atoms2); 511 tmScore = Calc.getTMScore(atoms1, atoms2, ca1.length, ca2len); 512 513 // set all block rotations to the overall rotation 514 // It's not well documented if this is the expected behavior, but 515 // it seems to work. 516 blockRotationMatrices[0] = matrix; 517 blockShifts[0] = shift; 518 blockRMSDs[0] = -1; 519 520 for(int i=1;i<blocks.size();i++) { 521 blockRMSDs[i] = -1; //TODO Recalculate for the FATCAT text format 522 blockRotationMatrices[i] = (Matrix) blockRotationMatrices[0].clone(); 523 blockShifts[i] = (Atom) blockShifts[0].clone(); 524 } 525 526 } 527 newAFPChain.setOptRmsd(blockRMSDs); 528 newAFPChain.setBlockRmsd(blockRMSDs); 529 newAFPChain.setBlockRotationMatrix(blockRotationMatrices); 530 newAFPChain.setBlockShiftVector(blockShifts); 531 newAFPChain.setTotalRmsdOpt(rmsd); 532 newAFPChain.setTMScore( tmScore ); 533 534 // Clean up remaining properties using the FatCat helper method 535 Atom[] ca2 = new Atom[ca2len]; 536 System.arraycopy(ca2duplicated, 0, ca2, 0, ca2len); 537 AFPAlignmentDisplay.getAlign(newAFPChain, ca1, ca2duplicated); 538 539 return newAFPChain; 540 } 541 542 private static int[] countCtermResidues(int[] block, int blockLen, 543 int cEnd, int overlapLength) { 544 int[] cTermResCount = new int[overlapLength+1]; // # res at or to the right of i within overlap 545 cTermResCount[overlapLength] = 0; 546 int alignPos = blockLen - 1; 547 for(int i=overlapLength-1;i>=0;i--) { // i starts at the c-term and increases to the left 548 if(block[alignPos] == cEnd - overlapLength+1 + i) { // matches the aligned pair 549 // the c-term contains the -ith overlapping residue 550 cTermResCount[i] = cTermResCount[i+1]+1; 551 alignPos--; 552 } else { 553 cTermResCount[i] = cTermResCount[i+1]; 554 } 555 } 556 return cTermResCount; 557 } 558 559 private static int[] countNtermResidues(int[] block, int nStart, 560 int overlapLength) { 561 int[] nTermResCount = new int[overlapLength+1]; // increases monotonically 562 nTermResCount[0] = 0; 563 int alignPos = 0; // index of the next aligned pair 564 565 for(int i=1;i<=overlapLength;i++) { 566 if(block[alignPos] == nStart + i-1 ) { // matches the aligned pair 567 // the n-term contains the ith overlapping residue 568 nTermResCount[i] = nTermResCount[i-1]+1; 569 alignPos++; 570 } else { 571 nTermResCount[i] = nTermResCount[i-1]; 572 } 573 } 574 return nTermResCount; 575 } 576 577 578 /** 579 * A light class to store an alignment between two residues. 580 * @author Spencer Bliven 581 * @see #filterDuplicateAFPs() 582 */ 583 private static class ResiduePair { 584 public int a; 585 public int b; 586 public ResiduePair(int a, int b) { 587 this.a=a; 588 this.b=b; 589 } 590 @Override 591 public String toString() { 592 return a+":"+b; 593 } 594 } 595 596 597 /** 598 * Tiny wrapper for the disallowed regions of an alignment. 599 * @see CeCPMain#calculateMinCP(int[], int, int, int) 600 * @author Spencer Bliven 601 * 602 */ 603 protected static class CPRange { 604 /** 605 * last allowed n-term 606 */ 607 public int n; 608 /** 609 * midpoint of the alignment 610 */ 611 public int mid; 612 /** 613 * first allowed c-term 614 */ 615 public int c; 616 } 617 618 /** 619 * Finds the alignment index of the residues minCPlength before and after 620 * the duplication. 621 * 622 * @param block The permuted block being considered, generally optAln[0][1] 623 * @param blockLen The length of the block (in case extra memory was allocated in block) 624 * @param ca2len The length, in residues, of the protein specified by block 625 * @param minCPlength The minimum number of residues allowed for a CP 626 * @return a CPRange with the following components: 627 * <dl><dt>n</dt><dd>Index into <code>block</code> of the residue such that 628 * <code>minCPlength</code> residues remain to the end of <code>ca2len</code>, 629 * or -1 if no residue fits that criterium.</dd> 630 * <dt>mid</dt><dd>Index of the first residue higher than <code>ca2len</code>.</dd> 631 * <dt>c</dt><dd>Index of <code>minCPlength</code>-th residue after ca2len, 632 * or ca2len*2 if no residue fits that criterium.</dd> 633 * </dl> 634 */ 635 protected static CPRange calculateMinCP(int[] block, int blockLen, int ca2len, int minCPlength) { 636 CPRange range = new CPRange(); 637 638 // Find the cut point within the alignment. 639 // Either returns the index i of the alignment such that block[i] == ca2len, 640 // or else returns -i-1 where block[i] is the first element > ca2len. 641 int middle = Arrays.binarySearch(block, ca2len); 642 if(middle < 0) { 643 middle = -middle -1; 644 } 645 // Middle is now the first res after the duplication 646 range.mid = middle; 647 648 int minCPntermIndex = middle-minCPlength; 649 if(minCPntermIndex >= 0) { 650 range.n = block[minCPntermIndex]; 651 } else { 652 range.n = -1; 653 } 654 655 int minCPctermIndex = middle+minCPlength-1; 656 if(minCPctermIndex < blockLen) { 657 range.c = block[minCPctermIndex]; 658 } else { 659 range.c = ca2len*2; 660 } 661 662 // Stub: 663 // Best-case: assume all residues in the termini are aligned 664 //range.n = ca2len - minCPlength; 665 //range.c = ca2len + minCPlength-1; 666 667 return range; 668 } 669 670 671 private static class CutPoint { 672 public int numResiduesCut; 673 public int firstRes; 674 public int lastRes; 675 } 676 677 private static CutPoint calculateCutPoint(int[] block,int nStart, int cEnd, 678 int overlapLength, int alignLen, int minCPlength, int ca2len, int firstRes) { 679 680 // We require at least minCPlength residues in a block. 681 //TODO calculate these explicitely based on the alignment 682 683 // The last valid n-term 684 CPRange minCP = calculateMinCP(block, alignLen, ca2len, minCPlength); 685 int minCPnterm = minCP.n; 686 // The first valid c-term 687 int minCPcterm = minCP.c; 688 689 690 // # res at or to the left of i within the overlap 691 int[] nTermResCount = countNtermResidues(block, nStart, 692 overlapLength); 693 694 // Determine the position with the largest sum of lengths 695 int[] cTermResCount = countCtermResidues(block, alignLen, 696 cEnd, overlapLength); 697 698 // Alignment length for a cut at the left of the overlap 699 int maxResCount=-1; 700 for(int i=0;i<=overlapLength;i++) { // i is the position of the CP within the overlap region 701 // Calculate number of residues which remain after the CP 702 int nRemain,cRemain; 703 if(nStart+i <= minCPnterm) { 704 nRemain = nTermResCount[overlapLength]-nTermResCount[i]; 705 } else { 706 nRemain = 0; 707 } 708 if(cEnd-overlapLength+i >= minCPcterm) { 709 cRemain = cTermResCount[0] - cTermResCount[i]; 710 } else { 711 cRemain = 0; 712 } 713 714 // Look for the cut point which cuts off the minimum number of res 715 if(nRemain + cRemain > maxResCount ) { // '>' biases towards keeping the n-term 716 maxResCount = nRemain + cRemain; 717 firstRes = nStart+ i; 718 } 719 } 720 721 // Calculate the number of residues cut within the overlap 722 int numResiduesCut = nTermResCount[overlapLength]+cTermResCount[0]-maxResCount; 723 724 // Remove short blocks 725 if(firstRes > minCPnterm) { 726 // update number of residues cut for those outside the overlap 727 numResiduesCut += 0; //TODO 728 729 firstRes = ca2len; 730 } 731 732 int lastRes = firstRes+ca2len-1; 733 if(lastRes < minCPcterm) { 734 // update number of residues cut for those outside the overlap 735 numResiduesCut += 0; //TODO 736 737 lastRes = ca2len-1; 738 } 739 740 741 CutPoint cp = new CutPoint(); 742 cp.firstRes=firstRes; 743 cp.numResiduesCut = numResiduesCut; 744 cp.lastRes = lastRes; 745 746 if(debug) { 747 System.out.format("Found a CP at residue %d. Trimming %d aligned residues from %d-%d of block 0 and %d-%d of block 1.\n", 748 firstRes,cp.numResiduesCut,nStart,firstRes-1,firstRes, cEnd-ca2len); 749 } 750 751 return cp; 752 } 753 754 755 // try showing a GUI 756 // requires additional dependencies biojava-structure-gui and JmolApplet 757 // TODO dmyersturnbull: This should probably be in structure-gui 758 @SuppressWarnings("unused") 759 private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException { 760 Atom[] ca1clone = StructureTools.cloneAtomArray(ca1); 761 Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); 762 if (! GuiWrapper.isGuiModuleInstalled()) { 763 System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!"); 764 // display alignment in console 765 System.out.println(afpChain.toCE(ca1clone, ca2clone)); 766 } else { 767 Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone); 768 GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol); 769 } 770 } 771}