001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Mar 9, 2010
021 * Author: Spencer Bliven
022 *
023 */
024
025package org.biojava.nbio.structure.align.ce;
026
027
028import org.biojava.nbio.structure.*;
029import org.biojava.nbio.structure.align.model.AFPChain;
030import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
031import org.biojava.nbio.structure.align.util.ConfigurationException;
032import org.biojava.nbio.structure.jama.Matrix;
033
034import java.lang.reflect.InvocationTargetException;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038
039/**
040 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding
041 * circular permutations.
042 * <p>
043 * A circular permutation consists of a single cleavage point and rearrangement
044 * between two structures, for example:
045 * <pre>
046 * ABCDEFG
047 * DEFGABC
048 * </pre>
049 * @author Spencer Bliven.
050 *
051 */
052public class CeCPMain extends CeMain {
053        private static boolean debug = false;
054
055        public static final String algorithmName = "jCE Circular Permutation";
056
057        /**
058         *  version history:
059         *  1.5 - Added more parameters to the command line, including -maxOptRMSD
060         *  1.4 - Added DuplicationHint parameter & default to duplicating the shorter chain
061         *  1.3 - Short CPs are now discarded
062         *  1.2 - now supports check AlignmentTools.isSequentialAlignment. XML protocol
063         *  1.1 - skipped, (trying to avoid confusion with jfatcat in all vs. all comparisons)
064         *  1.0 - initial release
065         */
066        public static final String version = "1.5";
067
068        public CeCPMain(){
069                super();
070                params = new CECPParameters();
071        }
072
073        @Override
074        public String getAlgorithmName() {
075                return CeCPMain.algorithmName;
076        }
077
078        @Override
079        public String getVersion() {
080                return CeCPMain.version;
081        }
082
083        public static void main(String[] args) throws ConfigurationException {
084                CeCPUserArgumentProcessor processor = new CeCPUserArgumentProcessor(); //Responsible for creating a CeCPMain instance
085                processor.process(args);
086        }
087
088
089        /**
090         * Aligns ca1 and ca2 using a heuristic to check for CPs.
091         * <p>
092         * Aligns ca1 against a doubled ca2, then cleans up the alignment.
093         * @param ca1
094         * @param ca2
095         * @param param
096         * @return the alignment, possibly containing a CP.
097         * @throws StructureException
098         */
099        @Override
100        public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) throws StructureException{
101                if ( ! (param instanceof CECPParameters))
102                        throw new IllegalArgumentException("CE algorithm needs an object of call CeParameters as argument.");
103
104                CECPParameters cpparams = (CECPParameters) param;
105                this.params = cpparams;
106
107                boolean duplicateRight;
108
109                switch( cpparams.getDuplicationHint() ) {
110                case LEFT:
111                        duplicateRight = false;
112                        break;
113                case RIGHT:
114                        duplicateRight = true;
115                        break;
116                case SHORTER:
117                        duplicateRight = ca1.length >= ca2.length;
118                        break;
119                default:
120                        duplicateRight = true;
121                }
122
123
124                if( duplicateRight ) {
125                        return alignRight(ca1, ca2, cpparams);
126                } else {
127                        if(debug) {
128                                System.out.println("Swapping alignment order.");
129                        }
130                        AFPChain afpChain = this.alignRight(ca2, ca1, cpparams);
131                        return invertAlignment(afpChain);
132                }
133        }
134
135        /**
136         * Aligns the structures, duplicating ca2 regardless of
137         * {@link CECPParameters.getDuplicationHint() param.getDuplicationHint}.
138         * @param ca1
139         * @param ca2
140         * @param cpparams
141         * @return
142         * @throws StructureException
143         */
144        private AFPChain alignRight(Atom[] ca1, Atom[] ca2, CECPParameters cpparams)
145                        throws StructureException {
146                long startTime = System.currentTimeMillis();
147
148                Atom[] ca2m = StructureTools.duplicateCA2(ca2);
149
150                if(debug) {
151                        System.out.format("Duplicating ca2 took %s ms\n",System.currentTimeMillis()-startTime);
152                        startTime = System.currentTimeMillis();
153                }
154
155                // Do alignment
156                AFPChain afpChain = super.align(ca1, ca2m,params);
157
158                // since the process of creating ca2m strips the name info away, set it explicitely
159                try {
160                        afpChain.setName2(ca2[0].getGroup().getChain().getStructure().getName());
161                } catch( Exception e) {}
162
163                if(debug) {
164                        System.out.format("Running %dx2*%d alignment took %s ms\n",ca1.length,ca2.length,System.currentTimeMillis()-startTime);
165                        startTime = System.currentTimeMillis();
166                }
167                afpChain = postProcessAlignment(afpChain, ca1, ca2m, calculator, cpparams);
168
169                if(debug) {
170                        System.out.format("Finding CP point took %s ms\n",System.currentTimeMillis()-startTime);
171                        startTime = System.currentTimeMillis();
172                }
173
174                return afpChain;
175        }
176
177
178
179        /** Circular permutation specific code to be run after the standard CE alignment
180         *
181         * @param afpChain The finished alignment
182         * @param ca1 CA atoms of the first protein
183         * @param ca2m A duplicated copy of the second protein
184         * @param calculator The CECalculator used to create afpChain
185         * @throws StructureException
186         */
187        public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator ) throws StructureException{
188                return postProcessAlignment(afpChain, ca1, ca2m, calculator, null);
189        }
190
191        /** Circular permutation specific code to be run after the standard CE alignment
192         *
193         * @param afpChain The finished alignment
194         * @param ca1 CA atoms of the first protein
195         * @param ca2m A duplicated copy of the second protein
196         * @param calculator The CECalculator used to create afpChain
197         * @param param Parameters
198         * @throws StructureException
199         */
200        public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator, CECPParameters param ) throws StructureException{
201
202                // remove bottom half of the matrix
203                Matrix doubledMatrix = afpChain.getDistanceMatrix();
204
205                // the matrix can be null if the alignment is too short.
206                if ( doubledMatrix != null ) {
207                        assert(doubledMatrix.getRowDimension() == ca1.length);
208                        assert(doubledMatrix.getColumnDimension() == ca2m.length);
209
210                        Matrix singleMatrix = doubledMatrix.getMatrix(0, ca1.length-1, 0, (ca2m.length/2)-1);
211                        assert(singleMatrix.getRowDimension() == ca1.length);
212                        assert(singleMatrix.getColumnDimension() == (ca2m.length/2));
213
214                        afpChain.setDistanceMatrix(singleMatrix);
215                }
216                // Check for circular permutations
217                int alignLen = afpChain.getOptLength();
218                if ( alignLen > 0) {
219                        afpChain = filterDuplicateAFPs(afpChain,calculator,ca1,ca2m,param);
220                }
221                return afpChain;
222        }
223
224        /**
225         * Swaps the order of structures in an AFPChain
226         * @param a
227         * @return
228         */
229        public AFPChain invertAlignment(AFPChain a) {
230                String name1 = a.getName1();
231                String name2 = a.getName2();
232                a.setName1(name2);
233                a.setName2(name1);
234
235                int len1 = a.getCa1Length();
236                a.setCa1Length( a.getCa2Length() );
237                a.setCa2Length( len1 );
238
239                int beg1 = a.getAlnbeg1();
240                a.setAlnbeg1(a.getAlnbeg2());
241                a.setAlnbeg2(beg1);
242
243                char[] alnseq1 = a.getAlnseq1();
244                a.setAlnseq1(a.getAlnseq2());
245                a.setAlnseq2(alnseq1);
246
247                Matrix distab1 = a.getDisTable1();
248                a.setDisTable1(a.getDisTable2());
249                a.setDisTable2(distab1);
250
251                int[] focusRes1 = a.getFocusRes1();
252                a.setFocusRes1(a.getFocusRes2());
253                a.setFocusRes2(focusRes1);
254
255                //What are aftIndex and befIndex used for? How are they indexed?
256                //a.getAfpAftIndex()
257
258
259                String[][][] pdbAln = a.getPdbAln();
260                if( pdbAln != null) {
261                        for(int block = 0; block < a.getBlockNum(); block++) {
262                                String[] paln1 = pdbAln[block][0];
263                                pdbAln[block][0] = pdbAln[block][1];
264                                pdbAln[block][1] = paln1;
265                        }
266                }
267
268                int[][][] optAln = a.getOptAln();
269                if( optAln != null ) {
270                        for(int block = 0; block < a.getBlockNum(); block++) {
271                                int[] aln1 = optAln[block][0];
272                                optAln[block][0] = optAln[block][1];
273                                optAln[block][1] = aln1;
274                        }
275                }
276                a.setOptAln(optAln); // triggers invalidate()
277
278                Matrix distmat = a.getDistanceMatrix();
279                if(distmat != null)
280                        a.setDistanceMatrix(distmat.transpose());
281
282
283                // invert the rotation matrices
284                Matrix[] blockRotMat = a.getBlockRotationMatrix();
285                Atom[] shiftVec = a.getBlockShiftVector();
286                if( blockRotMat != null) {
287                        for(int block = 0; block < a.getBlockNum(); block++) {
288                                if(blockRotMat[block] != null) {
289                                        // if y=x*A+b, then x=y*inv(A)-b*inv(A)
290                                        blockRotMat[block] = blockRotMat[block].inverse();
291
292                                        Calc.rotate(shiftVec[block],blockRotMat[block]);
293                                        shiftVec[block] = Calc.invert(shiftVec[block]);
294                                }
295                        }
296                }
297
298                return a;
299        }
300
301        /**
302         * Takes as input an AFPChain where ca2 has been artificially duplicated.
303         * This raises the possibility that some residues of ca2 will appear in
304         * multiple AFPs. This method filters out duplicates and makes sure that
305         * all AFPs are numbered relative to the original ca2.
306         *
307         * <p>The current version chooses a CP site such that the length of the
308         * alignment is maximized.
309         *
310         * <p>This method does <i>not</i> update scores to reflect the filtered alignment.
311         * It <i>does</i> update the RMSD and superposition.
312         *
313         * @param afpChain The alignment between ca1 and ca2-ca2. Blindly assumes
314         *  that ca2 has been duplicated.
315         * @return A new AFPChain consisting of ca1 to ca2, with each residue in
316         *  at most 1 AFP.
317         * @throws StructureException
318         */
319        public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, Atom[] ca1, Atom[] ca2duplicated) throws StructureException {
320                return filterDuplicateAFPs(afpChain, ceCalc, ca1, ca2duplicated, null);
321        }
322        public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc,
323                        Atom[] ca1, Atom[] ca2duplicated, CECPParameters params) throws StructureException {
324                AFPChain newAFPChain = new AFPChain(afpChain);
325
326                if(params == null)
327                        params = new CECPParameters();
328
329                final int minCPlength = params.getMinCPLength();
330
331
332                int ca2len = afpChain.getCa2Length()/2;
333                newAFPChain.setCa2Length(ca2len);
334
335                // Fix optimal alignment
336                int[][][] optAln = afpChain.getOptAln();
337                int[] optLen = afpChain.getOptLen();
338                int alignLen = afpChain.getOptLength();
339                if (alignLen < 1) return newAFPChain;
340
341                assert(afpChain.getBlockNum() == 1); // Assume that CE returns just one block
342
343
344                // Determine the region where ca2 and ca2' overlap
345
346                // The bounds of the alignment wrt ca2-ca2'
347                int nStart = optAln[0][1][0]; //alignment N-terminal
348                int cEnd = optAln[0][1][alignLen-1]; // alignment C-terminal
349                // overlap is between nStart and cEnd
350
351
352                int firstRes = nStart; // start res number after trimming
353                int lastRes = nStart+ca2len;  // last res number after trimming
354                if(nStart >= ca2len || cEnd < ca2len) { // no circular permutation
355                        firstRes=nStart;
356                        lastRes=cEnd;
357                } else {
358                        // Rule: maximize the length of the alignment
359
360                        int overlapLength = cEnd+1 - nStart - ca2len;
361                        if(overlapLength <= 0) {
362                                // no overlap!
363
364                                CPRange minCP = calculateMinCP(optAln[0][1], alignLen, ca2len, minCPlength);
365
366                                firstRes=nStart;
367                                lastRes=cEnd;
368
369                                // Remove short blocks
370                                if(firstRes > minCP.n) {
371                                        firstRes = ca2len;
372
373                                        if(debug) {
374                                                System.out.format("Discarding n-terminal block as too " +
375                                                                "short (%d residues, needs %d)\n",
376                                                                minCP.mid, minCPlength);
377                                        }
378                                }
379
380                                if(lastRes < minCP.c) {
381                                        lastRes = ca2len-1;
382
383                                        if(debug) {
384                                                System.out.format("Discarding c-terminal block as too " +
385                                                                "short (%d residues, needs %d)\n",
386                                                                optLen[0] - minCP.mid, minCPlength);
387                                        }
388                                }
389
390                        }
391                        else {
392                                // overlap!
393
394                                CutPoint cp = calculateCutPoint(optAln[0][1], nStart, cEnd,
395                                                overlapLength, alignLen, minCPlength, ca2len, firstRes);
396
397                                // Adjust alignment length for trimming
398                                //alignLen -= cp.numResiduesCut; //TODO inaccurate
399
400                                firstRes = cp.firstRes;
401                                lastRes = cp.lastRes;
402
403                                //TODO Now have CP site, and could do a NxM alignment for further optimization.
404                                // For now, does not appear to be worth the 50% increase in time
405
406                                //TODO Bug: scores need to be recalculated
407                        }
408                }
409
410
411
412                // Fix numbering:
413                // First, split up the atoms into left and right blocks
414                List< ResiduePair > left = new ArrayList<ResiduePair>(); // residues from left of duplication
415                List< ResiduePair > right = new ArrayList<ResiduePair>(); // residues from right of duplication
416
417                for(int i=0;i<optLen[0];i++) {
418                        if( optAln[0][1][i] >= firstRes && optAln[0][1][i] <= lastRes ) { // not trimmed
419                                if(optAln[0][1][i] < ca2len) { // in first half of ca2
420                                        left.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]));
421                                }
422                                else {
423                                        right.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]-ca2len));
424                                }
425                        }
426                }
427                //assert(left.size()+right.size() == alignLen);
428                alignLen = 0;
429
430                // Now we don't care about left/right, so just call them "blocks"
431                List<List<ResiduePair>> blocks = new ArrayList<List<ResiduePair>>(2);
432                if( !left.isEmpty() ) {
433                        blocks.add(left);
434                        alignLen += left.size();
435                }
436                if( !right.isEmpty()) {
437                        blocks.add(right);
438                        alignLen += right.size();
439                }
440                left=null; right = null;
441
442                // Put the blocks back together into arrays for the AFPChain
443                int[][][] newAlign = new int[blocks.size()][][];
444                int[] blockLengths = new int[blocks.size()];
445                for(int blockNum = 0; blockNum < blocks.size(); blockNum++) {
446                        //Alignment
447                        List<ResiduePair> block = blocks.get(blockNum);
448                        newAlign[blockNum] = new int[2][block.size()];
449                        for(int i=0;i<block.size();i++) {
450                                ResiduePair pair = block.get(i);
451                                newAlign[blockNum][0][i] = pair.a;
452                                newAlign[blockNum][1][i] = pair.b;
453                        }
454
455                        // Block lengths
456                        blockLengths[blockNum] = block.size();
457                }
458                // Set Alignment
459                newAFPChain.setOptAln(newAlign);
460                newAFPChain.setOptLen(blockLengths );
461                newAFPChain.setOptLength(alignLen);
462                newAFPChain.setBlockNum(blocks.size());
463                newAFPChain.setBlockResSize(blockLengths.clone());
464                newAFPChain.setSequentialAlignment(blocks.size() == 1);
465
466                // TODO make the AFPSet consistent
467                // TODO lots more block properties & old AFP properties
468
469                // Recalculate superposition
470                Atom[] atoms1 = new Atom[alignLen];
471                Atom[] atoms2 = new Atom[alignLen];
472
473                int pos=0;
474                for(List<ResiduePair> block:blocks ) {
475                        for(ResiduePair pair:block) {
476                                atoms1[pos] = ca1[pair.a];
477                                // Clone residue to allow modification
478                                Atom atom2 = ca2duplicated[pair.b];
479                                Group g = (Group) atom2.getGroup().clone();
480                                atoms2[pos] = g.getAtom( atom2.getName() );
481                                pos++;
482                        }
483                }
484                assert(pos == alignLen);
485
486                // Sets the rotation matrix in ceCalc to the proper value
487                double rmsd = -1;
488                double tmScore = 0.;
489                double[] blockRMSDs = new double[blocks.size()];
490                Matrix[] blockRotationMatrices = new Matrix[blocks.size()];
491                Atom[] blockShifts = new Atom[blocks.size()];
492
493                if(alignLen>0) {
494                        // superimpose
495                        SVDSuperimposer svd = new SVDSuperimposer(atoms1, atoms2);
496
497                        Matrix matrix = svd.getRotation();
498                        Atom shift = svd.getTranslation();
499
500                        for( Atom a : atoms2 ) {
501                                Calc.rotate(a.getGroup(), matrix);
502                                Calc.shift(a, shift);
503                        }
504
505                        //and get overall rmsd
506                        rmsd = SVDSuperimposer.getRMS(atoms1, atoms2);
507                        tmScore = SVDSuperimposer.getTMScore(atoms1, atoms2, ca1.length, ca2len);
508
509                        // set all block rotations to the overall rotation
510                        // It's not well documented if this is the expected behavior, but
511                        // it seems to work.
512                        blockRotationMatrices[0] = matrix;
513                        blockShifts[0] = shift;
514                        blockRMSDs[0] = -1;
515
516                        for(int i=1;i<blocks.size();i++) {
517                                blockRMSDs[i] = -1; //TODO Recalculate for the FATCAT text format
518                                blockRotationMatrices[i] = (Matrix) blockRotationMatrices[0].clone();
519                                blockShifts[i] = (Atom) blockShifts[0].clone();
520                        }
521
522                }
523                newAFPChain.setOptRmsd(blockRMSDs);
524                newAFPChain.setBlockRmsd(blockRMSDs);
525                newAFPChain.setBlockRotationMatrix(blockRotationMatrices);
526                newAFPChain.setBlockShiftVector(blockShifts);
527                newAFPChain.setTotalRmsdOpt(rmsd);
528                newAFPChain.setTMScore( tmScore );
529
530                // Clean up remaining properties using the FatCat helper method
531                Atom[] ca2 = new Atom[ca2len];
532                System.arraycopy(ca2duplicated, 0, ca2, 0, ca2len);
533                AFPAlignmentDisplay.getAlign(newAFPChain, ca1, ca2duplicated);
534
535                return newAFPChain;
536        }
537
538        private static int[] countCtermResidues(int[] block, int blockLen,
539                        int cEnd, int overlapLength) {
540                int[] cTermResCount = new int[overlapLength+1]; // # res at or to the right of i within overlap
541                cTermResCount[overlapLength] = 0;
542                int alignPos = blockLen - 1;
543                for(int i=overlapLength-1;i>=0;i--) { // i starts at the c-term and increases to the left
544                        if(block[alignPos] == cEnd - overlapLength+1 + i) { // matches the aligned pair
545                                // the c-term contains the -ith overlapping residue
546                                cTermResCount[i] = cTermResCount[i+1]+1;
547                                alignPos--;
548                        } else {
549                                cTermResCount[i] = cTermResCount[i+1];
550                        }
551                }
552                return cTermResCount;
553        }
554
555        private static int[] countNtermResidues(int[] block, int nStart,
556                        int overlapLength) {
557                int[] nTermResCount = new int[overlapLength+1]; // increases monotonically
558                nTermResCount[0] = 0;
559                int alignPos = 0; // index of the next aligned pair
560
561                for(int i=1;i<=overlapLength;i++) {
562                        if(block[alignPos] == nStart + i-1 ) { // matches the aligned pair
563                                // the n-term contains the ith overlapping residue
564                                nTermResCount[i] = nTermResCount[i-1]+1;
565                                alignPos++;
566                        } else {
567                                nTermResCount[i] = nTermResCount[i-1];
568                        }
569                }
570                return nTermResCount;
571        }
572
573
574        /**
575         * A light class to store an alignment between two residues.
576         * @author Spencer Bliven
577         * @see #filterDuplicateAFPs()
578         */
579        private static class ResiduePair {
580                public int a;
581                public int b;
582                public ResiduePair(int a, int b) {
583                        this.a=a;
584                        this.b=b;
585                }
586                @Override
587                public String toString() {
588                        return a+":"+b;
589                }
590        }
591
592
593        /**
594         * Tiny wrapper for the disallowed regions of an alignment.
595         * @see CeCPMain#calculateMinCP(int[], int, int, int)
596         * @author Spencer Bliven
597         *
598         */
599        protected static class CPRange {
600                /**
601                 * last allowed n-term
602                 */
603                public int n;
604                /**
605                 * midpoint of the alignment
606                 */
607                public int mid;
608                /**
609                 * first allowed c-term
610                 */
611                public int c;
612        }
613
614        /**
615         * Finds the alignment index of the residues minCPlength before and after
616         * the duplication.
617         *
618         * @param block The permuted block being considered, generally optAln[0][1]
619         * @param blockLen The length of the block (in case extra memory was allocated in block)
620         * @param ca2len The length, in residues, of the protein specified by block
621         * @param minCPlength The minimum number of residues allowed for a CP
622         * @return a CPRange with the following components:
623         *  <dl><dt>n</dt><dd>Index into <code>block</code> of the residue such that
624         *      <code>minCPlength</code> residues remain to the end of <code>ca2len</code>,
625         *      or -1 if no residue fits that criterium.</dd>
626         *  <dt>mid</dt><dd>Index of the first residue higher than <code>ca2len</code>.</dd>
627         *  <dt>c</dt><dd>Index of <code>minCPlength</code>-th residue after ca2len,
628         *      or ca2len*2 if no residue fits that criterium.</dd>
629         *  </dl>
630         */
631        protected static CPRange calculateMinCP(int[] block, int blockLen, int ca2len, int minCPlength) {
632                CPRange range = new CPRange();
633
634                // Find the cut point within the alignment.
635                // Either returns the index i of the alignment such that block[i] == ca2len,
636                // or else returns -i-1 where block[i] is the first element > ca2len.
637                int middle = Arrays.binarySearch(block, ca2len);
638                if(middle < 0) {
639                        middle = -middle -1;
640                }
641                // Middle is now the first res after the duplication
642                range.mid = middle;
643
644                int minCPntermIndex = middle-minCPlength;
645                if(minCPntermIndex >= 0) {
646                        range.n = block[minCPntermIndex];
647                } else {
648                        range.n = -1;
649                }
650
651                int minCPctermIndex = middle+minCPlength-1;
652                if(minCPctermIndex < blockLen) {
653                        range.c = block[minCPctermIndex];
654                } else {
655                        range.c = ca2len*2;
656                }
657
658                // Stub:
659                // Best-case: assume all residues in the termini are aligned
660                //range.n = ca2len - minCPlength;
661                //range.c = ca2len + minCPlength-1;
662
663                return range;
664        }
665
666
667        private static class CutPoint {
668                public int numResiduesCut;
669                public int firstRes;
670                public int lastRes;
671        }
672
673        private static CutPoint calculateCutPoint(int[] block,int nStart, int cEnd,
674                        int overlapLength, int alignLen, int minCPlength, int ca2len, int firstRes) {
675
676                // We require at least minCPlength residues in a block.
677                //TODO calculate these explicitely based on the alignment
678
679                // The last valid n-term
680                CPRange minCP = calculateMinCP(block, alignLen, ca2len, minCPlength);
681                int minCPnterm = minCP.n;
682                // The first valid c-term
683                int minCPcterm = minCP.c;
684
685
686                // # res at or to the left of i within the overlap
687                int[] nTermResCount = countNtermResidues(block, nStart,
688                                overlapLength);
689
690                // Determine the position with the largest sum of lengths
691                int[] cTermResCount = countCtermResidues(block, alignLen,
692                                cEnd, overlapLength);
693
694                // Alignment length for a cut at the left of the overlap
695                int maxResCount=-1;
696                for(int i=0;i<=overlapLength;i++) { // i is the position of the CP within the overlap region
697                        // Calculate number of residues which remain after the CP
698                        int nRemain,cRemain;
699                        if(nStart+i <= minCPnterm) {
700                                nRemain = nTermResCount[overlapLength]-nTermResCount[i];
701                        } else {
702                                nRemain = 0;
703                        }
704                        if(cEnd-overlapLength+i >= minCPcterm) {
705                                cRemain = cTermResCount[0] - cTermResCount[i];
706                        } else {
707                                cRemain = 0;
708                        }
709
710                        // Look for the cut point which cuts off the minimum number of res
711                        if(nRemain + cRemain > maxResCount ) { // '>' biases towards keeping the n-term
712                                maxResCount = nRemain + cRemain;
713                                firstRes = nStart+ i;
714                        }
715                }
716
717                // Calculate the number of residues cut within the overlap
718                int numResiduesCut = nTermResCount[overlapLength]+cTermResCount[0]-maxResCount;
719
720                // Remove short blocks
721                if(firstRes > minCPnterm) {
722                        // update number of residues cut for those outside the overlap
723                        numResiduesCut += 0; //TODO
724
725                        firstRes = ca2len;
726                }
727
728                int lastRes = firstRes+ca2len-1;
729                if(lastRes < minCPcterm) {
730                        // update number of residues cut for those outside the overlap
731                        numResiduesCut += 0; //TODO
732
733                        lastRes = ca2len-1;
734                }
735
736
737                CutPoint cp = new CutPoint();
738                cp.firstRes=firstRes;
739                cp.numResiduesCut = numResiduesCut;
740                cp.lastRes = lastRes;
741
742                if(debug) {
743                        System.out.format("Found a CP at residue %d. Trimming %d aligned residues from %d-%d of block 0 and %d-%d of block 1.\n",
744                                        firstRes,cp.numResiduesCut,nStart,firstRes-1,firstRes, cEnd-ca2len);
745                }
746
747                return cp;
748        }
749
750
751        // try showing a GUI
752        // requires additional dependencies biojava-structure-gui and JmolApplet
753        // TODO dmyersturnbull: This should probably be in structure-gui
754        @SuppressWarnings("unused")
755        private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException {
756                Atom[] ca1clone = StructureTools.cloneAtomArray(ca1);
757                Atom[] ca2clone = StructureTools.cloneAtomArray(ca2);
758                if (! GuiWrapper.isGuiModuleInstalled()) {
759                        System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!");
760                        // display alignment in console
761                        System.out.println(afpChain.toCE(ca1clone, ca2clone));
762                } else {
763                        Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone);
764                        GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol);
765                }
766        }
767}