001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Mar 9, 2010
021 * Author: Spencer Bliven
022 *
023 */
024
025package org.biojava.nbio.structure.align.ce;
026
027
028import org.biojava.nbio.structure.*;
029import org.biojava.nbio.structure.align.model.AFPChain;
030import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
031import org.biojava.nbio.structure.align.util.ConfigurationException;
032import org.biojava.nbio.structure.geometry.Matrices;
033import org.biojava.nbio.structure.geometry.SuperPositions;
034import org.biojava.nbio.structure.jama.Matrix;
035
036import java.lang.reflect.InvocationTargetException;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.List;
040
041import javax.vecmath.Matrix4d;
042
043/**
044 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding
045 * circular permutations.
046 * <p>
047 * A circular permutation consists of a single cleavage point and rearrangement
048 * between two structures, for example:
049 * <pre>
050 * ABCDEFG
051 * DEFGABC
052 * </pre>
053 * @author Spencer Bliven.
054 *
055 */
056public class CeCPMain extends CeMain {
057        private static boolean debug = false;
058
059        public static final String algorithmName = "jCE Circular Permutation";
060
061        /**
062         *  version history:
063         *  1.5 - Added more parameters to the command line, including -maxOptRMSD
064         *  1.4 - Added DuplicationHint parameter & default to duplicating the shorter chain
065         *  1.3 - Short CPs are now discarded
066         *  1.2 - now supports check AlignmentTools.isSequentialAlignment. XML protocol
067         *  1.1 - skipped, (trying to avoid confusion with jfatcat in all vs. all comparisons)
068         *  1.0 - initial release
069         */
070        public static final String version = "1.5";
071
072        public CeCPMain(){
073                super();
074                params = new CECPParameters();
075        }
076
077        @Override
078        public String getAlgorithmName() {
079                return CeCPMain.algorithmName;
080        }
081
082        @Override
083        public String getVersion() {
084                return CeCPMain.version;
085        }
086
087        public static void main(String[] args) throws ConfigurationException {
088                CeCPUserArgumentProcessor processor = new CeCPUserArgumentProcessor(); //Responsible for creating a CeCPMain instance
089                processor.process(args);
090        }
091
092
093        /**
094         * Aligns ca1 and ca2 using a heuristic to check for CPs.
095         * <p>
096         * Aligns ca1 against a doubled ca2, then cleans up the alignment.
097         * @param ca1
098         * @param ca2
099         * @param param
100         * @return the alignment, possibly containing a CP.
101         * @throws StructureException
102         */
103        @Override
104        public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) throws StructureException{
105                if ( ! (param instanceof CECPParameters))
106                        throw new IllegalArgumentException("CE algorithm needs an object of call CeParameters as argument.");
107
108                CECPParameters cpparams = (CECPParameters) param;
109                this.params = cpparams;
110
111                boolean duplicateRight;
112
113                switch( cpparams.getDuplicationHint() ) {
114                case LEFT:
115                        duplicateRight = false;
116                        break;
117                case RIGHT:
118                        duplicateRight = true;
119                        break;
120                case SHORTER:
121                        duplicateRight = ca1.length >= ca2.length;
122                        break;
123                default:
124                        duplicateRight = true;
125                }
126
127
128                if( duplicateRight ) {
129                        return alignRight(ca1, ca2, cpparams);
130                } else {
131                        if(debug) {
132                                System.out.println("Swapping alignment order.");
133                        }
134                        AFPChain afpChain = this.alignRight(ca2, ca1, cpparams);
135                        return invertAlignment(afpChain);
136                }
137        }
138
139        /**
140         * Aligns the structures, duplicating ca2 regardless of
141         * {@link CECPParameters.getDuplicationHint() param.getDuplicationHint}.
142         * @param ca1
143         * @param ca2
144         * @param cpparams
145         * @return
146         * @throws StructureException
147         */
148        private AFPChain alignRight(Atom[] ca1, Atom[] ca2, CECPParameters cpparams)
149                        throws StructureException {
150                long startTime = System.currentTimeMillis();
151
152                Atom[] ca2m = StructureTools.duplicateCA2(ca2);
153
154                if(debug) {
155                        System.out.format("Duplicating ca2 took %s ms\n",System.currentTimeMillis()-startTime);
156                        startTime = System.currentTimeMillis();
157                }
158
159                // Do alignment
160                AFPChain afpChain = super.align(ca1, ca2m,params);
161
162                // since the process of creating ca2m strips the name info away, set it explicitely
163                try {
164                        afpChain.setName2(ca2[0].getGroup().getChain().getStructure().getName());
165                } catch( Exception e) {}
166
167                if(debug) {
168                        System.out.format("Running %dx2*%d alignment took %s ms\n",ca1.length,ca2.length,System.currentTimeMillis()-startTime);
169                        startTime = System.currentTimeMillis();
170                }
171                afpChain = postProcessAlignment(afpChain, ca1, ca2m, calculator, cpparams);
172
173                if(debug) {
174                        System.out.format("Finding CP point took %s ms\n",System.currentTimeMillis()-startTime);
175                        startTime = System.currentTimeMillis();
176                }
177
178                return afpChain;
179        }
180
181
182
183        /** Circular permutation specific code to be run after the standard CE alignment
184         *
185         * @param afpChain The finished alignment
186         * @param ca1 CA atoms of the first protein
187         * @param ca2m A duplicated copy of the second protein
188         * @param calculator The CECalculator used to create afpChain
189         * @throws StructureException
190         */
191        public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator ) throws StructureException{
192                return postProcessAlignment(afpChain, ca1, ca2m, calculator, null);
193        }
194
195        /** Circular permutation specific code to be run after the standard CE alignment
196         *
197         * @param afpChain The finished alignment
198         * @param ca1 CA atoms of the first protein
199         * @param ca2m A duplicated copy of the second protein
200         * @param calculator The CECalculator used to create afpChain
201         * @param param Parameters
202         * @throws StructureException
203         */
204        public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator, CECPParameters param ) throws StructureException{
205
206                // remove bottom half of the matrix
207                Matrix doubledMatrix = afpChain.getDistanceMatrix();
208
209                // the matrix can be null if the alignment is too short.
210                if ( doubledMatrix != null ) {
211                        assert(doubledMatrix.getRowDimension() == ca1.length);
212                        assert(doubledMatrix.getColumnDimension() == ca2m.length);
213
214                        Matrix singleMatrix = doubledMatrix.getMatrix(0, ca1.length-1, 0, (ca2m.length/2)-1);
215                        assert(singleMatrix.getRowDimension() == ca1.length);
216                        assert(singleMatrix.getColumnDimension() == (ca2m.length/2));
217
218                        afpChain.setDistanceMatrix(singleMatrix);
219                }
220                // Check for circular permutations
221                int alignLen = afpChain.getOptLength();
222                if ( alignLen > 0) {
223                        afpChain = filterDuplicateAFPs(afpChain,calculator,ca1,ca2m,param);
224                }
225                return afpChain;
226        }
227
228        /**
229         * Swaps the order of structures in an AFPChain
230         * @param a
231         * @return
232         */
233        public AFPChain invertAlignment(AFPChain a) {
234                String name1 = a.getName1();
235                String name2 = a.getName2();
236                a.setName1(name2);
237                a.setName2(name1);
238
239                int len1 = a.getCa1Length();
240                a.setCa1Length( a.getCa2Length() );
241                a.setCa2Length( len1 );
242
243                int beg1 = a.getAlnbeg1();
244                a.setAlnbeg1(a.getAlnbeg2());
245                a.setAlnbeg2(beg1);
246
247                char[] alnseq1 = a.getAlnseq1();
248                a.setAlnseq1(a.getAlnseq2());
249                a.setAlnseq2(alnseq1);
250
251                Matrix distab1 = a.getDisTable1();
252                a.setDisTable1(a.getDisTable2());
253                a.setDisTable2(distab1);
254
255                int[] focusRes1 = a.getFocusRes1();
256                a.setFocusRes1(a.getFocusRes2());
257                a.setFocusRes2(focusRes1);
258
259                //What are aftIndex and befIndex used for? How are they indexed?
260                //a.getAfpAftIndex()
261
262
263                String[][][] pdbAln = a.getPdbAln();
264                if( pdbAln != null) {
265                        for(int block = 0; block < a.getBlockNum(); block++) {
266                                String[] paln1 = pdbAln[block][0];
267                                pdbAln[block][0] = pdbAln[block][1];
268                                pdbAln[block][1] = paln1;
269                        }
270                }
271
272                int[][][] optAln = a.getOptAln();
273                if( optAln != null ) {
274                        for(int block = 0; block < a.getBlockNum(); block++) {
275                                int[] aln1 = optAln[block][0];
276                                optAln[block][0] = optAln[block][1];
277                                optAln[block][1] = aln1;
278                        }
279                }
280                a.setOptAln(optAln); // triggers invalidate()
281
282                Matrix distmat = a.getDistanceMatrix();
283                if(distmat != null)
284                        a.setDistanceMatrix(distmat.transpose());
285
286
287                // invert the rotation matrices
288                Matrix[] blockRotMat = a.getBlockRotationMatrix();
289                Atom[] shiftVec = a.getBlockShiftVector();
290                if( blockRotMat != null) {
291                        for(int block = 0; block < a.getBlockNum(); block++) {
292                                if(blockRotMat[block] != null) {
293                                        // if y=x*A+b, then x=y*inv(A)-b*inv(A)
294                                        blockRotMat[block] = blockRotMat[block].inverse();
295
296                                        Calc.rotate(shiftVec[block],blockRotMat[block]);
297                                        shiftVec[block] = Calc.invert(shiftVec[block]);
298                                }
299                        }
300                }
301
302                return a;
303        }
304
305        /**
306         * Takes as input an AFPChain where ca2 has been artificially duplicated.
307         * This raises the possibility that some residues of ca2 will appear in
308         * multiple AFPs. This method filters out duplicates and makes sure that
309         * all AFPs are numbered relative to the original ca2.
310         *
311         * <p>The current version chooses a CP site such that the length of the
312         * alignment is maximized.
313         *
314         * <p>This method does <i>not</i> update scores to reflect the filtered alignment.
315         * It <i>does</i> update the RMSD and superposition.
316         *
317         * @param afpChain The alignment between ca1 and ca2-ca2. Blindly assumes
318         *  that ca2 has been duplicated.
319         * @return A new AFPChain consisting of ca1 to ca2, with each residue in
320         *  at most 1 AFP.
321         * @throws StructureException
322         */
323        public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, Atom[] ca1, Atom[] ca2duplicated) throws StructureException {
324                return filterDuplicateAFPs(afpChain, ceCalc, ca1, ca2duplicated, null);
325        }
326        public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc,
327                        Atom[] ca1, Atom[] ca2duplicated, CECPParameters params) throws StructureException {
328                AFPChain newAFPChain = new AFPChain(afpChain);
329
330                if(params == null)
331                        params = new CECPParameters();
332
333                final int minCPlength = params.getMinCPLength();
334
335
336                int ca2len = afpChain.getCa2Length()/2;
337                newAFPChain.setCa2Length(ca2len);
338
339                // Fix optimal alignment
340                int[][][] optAln = afpChain.getOptAln();
341                int[] optLen = afpChain.getOptLen();
342                int alignLen = afpChain.getOptLength();
343                if (alignLen < 1) return newAFPChain;
344
345                assert(afpChain.getBlockNum() == 1); // Assume that CE returns just one block
346
347
348                // Determine the region where ca2 and ca2' overlap
349
350                // The bounds of the alignment wrt ca2-ca2'
351                int nStart = optAln[0][1][0]; //alignment N-terminal
352                int cEnd = optAln[0][1][alignLen-1]; // alignment C-terminal
353                // overlap is between nStart and cEnd
354
355
356                int firstRes = nStart; // start res number after trimming
357                int lastRes = nStart+ca2len;  // last res number after trimming
358                if(nStart >= ca2len || cEnd < ca2len) { // no circular permutation
359                        firstRes=nStart;
360                        lastRes=cEnd;
361                } else {
362                        // Rule: maximize the length of the alignment
363
364                        int overlapLength = cEnd+1 - nStart - ca2len;
365                        if(overlapLength <= 0) {
366                                // no overlap!
367
368                                CPRange minCP = calculateMinCP(optAln[0][1], alignLen, ca2len, minCPlength);
369
370                                firstRes=nStart;
371                                lastRes=cEnd;
372
373                                // Remove short blocks
374                                if(firstRes > minCP.n) {
375                                        firstRes = ca2len;
376
377                                        if(debug) {
378                                                System.out.format("Discarding n-terminal block as too " +
379                                                                "short (%d residues, needs %d)\n",
380                                                                minCP.mid, minCPlength);
381                                        }
382                                }
383
384                                if(lastRes < minCP.c) {
385                                        lastRes = ca2len-1;
386
387                                        if(debug) {
388                                                System.out.format("Discarding c-terminal block as too " +
389                                                                "short (%d residues, needs %d)\n",
390                                                                optLen[0] - minCP.mid, minCPlength);
391                                        }
392                                }
393
394                        }
395                        else {
396                                // overlap!
397
398                                CutPoint cp = calculateCutPoint(optAln[0][1], nStart, cEnd,
399                                                overlapLength, alignLen, minCPlength, ca2len, firstRes);
400
401                                // Adjust alignment length for trimming
402                                //alignLen -= cp.numResiduesCut; //TODO inaccurate
403
404                                firstRes = cp.firstRes;
405                                lastRes = cp.lastRes;
406
407                                //TODO Now have CP site, and could do a NxM alignment for further optimization.
408                                // For now, does not appear to be worth the 50% increase in time
409
410                                //TODO Bug: scores need to be recalculated
411                        }
412                }
413
414
415
416                // Fix numbering:
417                // First, split up the atoms into left and right blocks
418                List< ResiduePair > left = new ArrayList<ResiduePair>(); // residues from left of duplication
419                List< ResiduePair > right = new ArrayList<ResiduePair>(); // residues from right of duplication
420
421                for(int i=0;i<optLen[0];i++) {
422                        if( optAln[0][1][i] >= firstRes && optAln[0][1][i] <= lastRes ) { // not trimmed
423                                if(optAln[0][1][i] < ca2len) { // in first half of ca2
424                                        left.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]));
425                                }
426                                else {
427                                        right.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]-ca2len));
428                                }
429                        }
430                }
431                //assert(left.size()+right.size() == alignLen);
432                alignLen = 0;
433
434                // Now we don't care about left/right, so just call them "blocks"
435                List<List<ResiduePair>> blocks = new ArrayList<List<ResiduePair>>(2);
436                if( !left.isEmpty() ) {
437                        blocks.add(left);
438                        alignLen += left.size();
439                }
440                if( !right.isEmpty()) {
441                        blocks.add(right);
442                        alignLen += right.size();
443                }
444                left=null; right = null;
445
446                // Put the blocks back together into arrays for the AFPChain
447                int[][][] newAlign = new int[blocks.size()][][];
448                int[] blockLengths = new int[blocks.size()];
449                for(int blockNum = 0; blockNum < blocks.size(); blockNum++) {
450                        //Alignment
451                        List<ResiduePair> block = blocks.get(blockNum);
452                        newAlign[blockNum] = new int[2][block.size()];
453                        for(int i=0;i<block.size();i++) {
454                                ResiduePair pair = block.get(i);
455                                newAlign[blockNum][0][i] = pair.a;
456                                newAlign[blockNum][1][i] = pair.b;
457                        }
458
459                        // Block lengths
460                        blockLengths[blockNum] = block.size();
461                }
462                // Set Alignment
463                newAFPChain.setOptAln(newAlign);
464                newAFPChain.setOptLen(blockLengths );
465                newAFPChain.setOptLength(alignLen);
466                newAFPChain.setBlockNum(blocks.size());
467                newAFPChain.setBlockResSize(blockLengths.clone());
468                newAFPChain.setSequentialAlignment(blocks.size() == 1);
469
470                // TODO make the AFPSet consistent
471                // TODO lots more block properties & old AFP properties
472
473                // Recalculate superposition
474                Atom[] atoms1 = new Atom[alignLen];
475                Atom[] atoms2 = new Atom[alignLen];
476
477                int pos=0;
478                for(List<ResiduePair> block:blocks ) {
479                        for(ResiduePair pair:block) {
480                                atoms1[pos] = ca1[pair.a];
481                                // Clone residue to allow modification
482                                Atom atom2 = ca2duplicated[pair.b];
483                                Group g = (Group) atom2.getGroup().clone();
484                                atoms2[pos] = g.getAtom( atom2.getName() );
485                                pos++;
486                        }
487                }
488                assert(pos == alignLen);
489
490                // Sets the rotation matrix in ceCalc to the proper value
491                double rmsd = -1;
492                double tmScore = 0.;
493                double[] blockRMSDs = new double[blocks.size()];
494                Matrix[] blockRotationMatrices = new Matrix[blocks.size()];
495                Atom[] blockShifts = new Atom[blocks.size()];
496
497                if(alignLen>0) {
498
499                        // superimpose
500                        Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(atoms1),
501                                        Calc.atomsToPoints(atoms2));
502
503                        Matrix matrix = Matrices.getRotationJAMA(trans);
504                        Atom shift = Calc.getTranslationVector(trans);
505
506                        for( Atom a : atoms2 )
507                                Calc.transform(a.getGroup(), trans);
508
509                        //and get overall rmsd
510                        rmsd = Calc.rmsd(atoms1, atoms2);
511                        tmScore = Calc.getTMScore(atoms1, atoms2, ca1.length, ca2len);
512
513                        // set all block rotations to the overall rotation
514                        // It's not well documented if this is the expected behavior, but
515                        // it seems to work.
516                        blockRotationMatrices[0] = matrix;
517                        blockShifts[0] = shift;
518                        blockRMSDs[0] = -1;
519
520                        for(int i=1;i<blocks.size();i++) {
521                                blockRMSDs[i] = -1; //TODO Recalculate for the FATCAT text format
522                                blockRotationMatrices[i] = (Matrix) blockRotationMatrices[0].clone();
523                                blockShifts[i] = (Atom) blockShifts[0].clone();
524                        }
525
526                }
527                newAFPChain.setOptRmsd(blockRMSDs);
528                newAFPChain.setBlockRmsd(blockRMSDs);
529                newAFPChain.setBlockRotationMatrix(blockRotationMatrices);
530                newAFPChain.setBlockShiftVector(blockShifts);
531                newAFPChain.setTotalRmsdOpt(rmsd);
532                newAFPChain.setTMScore( tmScore );
533
534                // Clean up remaining properties using the FatCat helper method
535                Atom[] ca2 = new Atom[ca2len];
536                System.arraycopy(ca2duplicated, 0, ca2, 0, ca2len);
537                AFPAlignmentDisplay.getAlign(newAFPChain, ca1, ca2duplicated);
538
539                return newAFPChain;
540        }
541
542        private static int[] countCtermResidues(int[] block, int blockLen,
543                        int cEnd, int overlapLength) {
544                int[] cTermResCount = new int[overlapLength+1]; // # res at or to the right of i within overlap
545                cTermResCount[overlapLength] = 0;
546                int alignPos = blockLen - 1;
547                for(int i=overlapLength-1;i>=0;i--) { // i starts at the c-term and increases to the left
548                        if(block[alignPos] == cEnd - overlapLength+1 + i) { // matches the aligned pair
549                                // the c-term contains the -ith overlapping residue
550                                cTermResCount[i] = cTermResCount[i+1]+1;
551                                alignPos--;
552                        } else {
553                                cTermResCount[i] = cTermResCount[i+1];
554                        }
555                }
556                return cTermResCount;
557        }
558
559        private static int[] countNtermResidues(int[] block, int nStart,
560                        int overlapLength) {
561                int[] nTermResCount = new int[overlapLength+1]; // increases monotonically
562                nTermResCount[0] = 0;
563                int alignPos = 0; // index of the next aligned pair
564
565                for(int i=1;i<=overlapLength;i++) {
566                        if(block[alignPos] == nStart + i-1 ) { // matches the aligned pair
567                                // the n-term contains the ith overlapping residue
568                                nTermResCount[i] = nTermResCount[i-1]+1;
569                                alignPos++;
570                        } else {
571                                nTermResCount[i] = nTermResCount[i-1];
572                        }
573                }
574                return nTermResCount;
575        }
576
577
578        /**
579         * A light class to store an alignment between two residues.
580         * @author Spencer Bliven
581         * @see #filterDuplicateAFPs()
582         */
583        private static class ResiduePair {
584                public int a;
585                public int b;
586                public ResiduePair(int a, int b) {
587                        this.a=a;
588                        this.b=b;
589                }
590                @Override
591                public String toString() {
592                        return a+":"+b;
593                }
594        }
595
596
597        /**
598         * Tiny wrapper for the disallowed regions of an alignment.
599         * @see CeCPMain#calculateMinCP(int[], int, int, int)
600         * @author Spencer Bliven
601         *
602         */
603        protected static class CPRange {
604                /**
605                 * last allowed n-term
606                 */
607                public int n;
608                /**
609                 * midpoint of the alignment
610                 */
611                public int mid;
612                /**
613                 * first allowed c-term
614                 */
615                public int c;
616        }
617
618        /**
619         * Finds the alignment index of the residues minCPlength before and after
620         * the duplication.
621         *
622         * @param block The permuted block being considered, generally optAln[0][1]
623         * @param blockLen The length of the block (in case extra memory was allocated in block)
624         * @param ca2len The length, in residues, of the protein specified by block
625         * @param minCPlength The minimum number of residues allowed for a CP
626         * @return a CPRange with the following components:
627         *  <dl><dt>n</dt><dd>Index into <code>block</code> of the residue such that
628         *      <code>minCPlength</code> residues remain to the end of <code>ca2len</code>,
629         *      or -1 if no residue fits that criterium.</dd>
630         *  <dt>mid</dt><dd>Index of the first residue higher than <code>ca2len</code>.</dd>
631         *  <dt>c</dt><dd>Index of <code>minCPlength</code>-th residue after ca2len,
632         *      or ca2len*2 if no residue fits that criterium.</dd>
633         *  </dl>
634         */
635        protected static CPRange calculateMinCP(int[] block, int blockLen, int ca2len, int minCPlength) {
636                CPRange range = new CPRange();
637
638                // Find the cut point within the alignment.
639                // Either returns the index i of the alignment such that block[i] == ca2len,
640                // or else returns -i-1 where block[i] is the first element > ca2len.
641                int middle = Arrays.binarySearch(block, ca2len);
642                if(middle < 0) {
643                        middle = -middle -1;
644                }
645                // Middle is now the first res after the duplication
646                range.mid = middle;
647
648                int minCPntermIndex = middle-minCPlength;
649                if(minCPntermIndex >= 0) {
650                        range.n = block[minCPntermIndex];
651                } else {
652                        range.n = -1;
653                }
654
655                int minCPctermIndex = middle+minCPlength-1;
656                if(minCPctermIndex < blockLen) {
657                        range.c = block[minCPctermIndex];
658                } else {
659                        range.c = ca2len*2;
660                }
661
662                // Stub:
663                // Best-case: assume all residues in the termini are aligned
664                //range.n = ca2len - minCPlength;
665                //range.c = ca2len + minCPlength-1;
666
667                return range;
668        }
669
670
671        private static class CutPoint {
672                public int numResiduesCut;
673                public int firstRes;
674                public int lastRes;
675        }
676
677        private static CutPoint calculateCutPoint(int[] block,int nStart, int cEnd,
678                        int overlapLength, int alignLen, int minCPlength, int ca2len, int firstRes) {
679
680                // We require at least minCPlength residues in a block.
681                //TODO calculate these explicitely based on the alignment
682
683                // The last valid n-term
684                CPRange minCP = calculateMinCP(block, alignLen, ca2len, minCPlength);
685                int minCPnterm = minCP.n;
686                // The first valid c-term
687                int minCPcterm = minCP.c;
688
689
690                // # res at or to the left of i within the overlap
691                int[] nTermResCount = countNtermResidues(block, nStart,
692                                overlapLength);
693
694                // Determine the position with the largest sum of lengths
695                int[] cTermResCount = countCtermResidues(block, alignLen,
696                                cEnd, overlapLength);
697
698                // Alignment length for a cut at the left of the overlap
699                int maxResCount=-1;
700                for(int i=0;i<=overlapLength;i++) { // i is the position of the CP within the overlap region
701                        // Calculate number of residues which remain after the CP
702                        int nRemain,cRemain;
703                        if(nStart+i <= minCPnterm) {
704                                nRemain = nTermResCount[overlapLength]-nTermResCount[i];
705                        } else {
706                                nRemain = 0;
707                        }
708                        if(cEnd-overlapLength+i >= minCPcterm) {
709                                cRemain = cTermResCount[0] - cTermResCount[i];
710                        } else {
711                                cRemain = 0;
712                        }
713
714                        // Look for the cut point which cuts off the minimum number of res
715                        if(nRemain + cRemain > maxResCount ) { // '>' biases towards keeping the n-term
716                                maxResCount = nRemain + cRemain;
717                                firstRes = nStart+ i;
718                        }
719                }
720
721                // Calculate the number of residues cut within the overlap
722                int numResiduesCut = nTermResCount[overlapLength]+cTermResCount[0]-maxResCount;
723
724                // Remove short blocks
725                if(firstRes > minCPnterm) {
726                        // update number of residues cut for those outside the overlap
727                        numResiduesCut += 0; //TODO
728
729                        firstRes = ca2len;
730                }
731
732                int lastRes = firstRes+ca2len-1;
733                if(lastRes < minCPcterm) {
734                        // update number of residues cut for those outside the overlap
735                        numResiduesCut += 0; //TODO
736
737                        lastRes = ca2len-1;
738                }
739
740
741                CutPoint cp = new CutPoint();
742                cp.firstRes=firstRes;
743                cp.numResiduesCut = numResiduesCut;
744                cp.lastRes = lastRes;
745
746                if(debug) {
747                        System.out.format("Found a CP at residue %d. Trimming %d aligned residues from %d-%d of block 0 and %d-%d of block 1.\n",
748                                        firstRes,cp.numResiduesCut,nStart,firstRes-1,firstRes, cEnd-ca2len);
749                }
750
751                return cp;
752        }
753
754
755        // try showing a GUI
756        // requires additional dependencies biojava-structure-gui and JmolApplet
757        // TODO dmyersturnbull: This should probably be in structure-gui
758        @SuppressWarnings("unused")
759        private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException {
760                Atom[] ca1clone = StructureTools.cloneAtomArray(ca1);
761                Atom[] ca2clone = StructureTools.cloneAtomArray(ca2);
762                if (! GuiWrapper.isGuiModuleInstalled()) {
763                        System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!");
764                        // display alignment in console
765                        System.out.println(afpChain.toCE(ca1clone, ca2clone));
766                } else {
767                        Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone);
768                        GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol);
769                }
770        }
771}