001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.symmetry.internal;
022
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.Iterator;
026import java.util.LinkedList;
027import java.util.List;
028import java.util.Map;
029import java.util.NavigableSet;
030import java.util.TreeSet;
031
032import org.biojava.nbio.structure.Atom;
033import org.biojava.nbio.structure.StructureException;
034import org.biojava.nbio.structure.align.model.AFPChain;
035import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
036import org.biojava.nbio.structure.align.util.AlignmentTools;
037import org.biojava.nbio.structure.symmetry.utils.SymmetryTools;
038
039/**
040 * Creates a refined alignment with the CE-Symm alternative self-alignment.
041 * Needs the order of symmetry and assumes that the last repeat aligns
042 * with the first, being thus a CLOSE symmetry.
043 *
044 * @author Spencer Bliven
045 * @author Aleix Lafita
046 * @since 4.2.0
047 *
048 */
049public class SequenceFunctionRefiner implements SymmetryRefiner {
050
051        @Override
052        public MultipleAlignment refine(AFPChain selfAlignment, Atom[] atoms,
053                        int order) throws RefinerFailedException, StructureException {
054
055                if (order < 2)  throw new RefinerFailedException(
056                                "Symmetry not found in the structure: order < 2.");
057
058                AFPChain afp = refineSymmetry(selfAlignment, atoms, atoms, order);
059                return SymmetryTools.fromAFP(afp, atoms);
060        }
061
062        /**
063         * Refines a CE-Symm alignment so that it is perfectly symmetric.
064         * <p>
065         * The resulting alignment will have a one-to-one correspondance between
066         * aligned residues of each symmetric part.
067         *
068         * @param afpChain Input alignment from CE-Symm
069         * @param k Symmetry order. This can be guessed by {@link AlignmentTools#getSymmetryOrder(Map, Map, int, float)}
070         * @return The refined alignment
071         * @throws StructureException
072         * @throws RefinerFailedException
073         */
074        public static AFPChain refineSymmetry(AFPChain afpChain, Atom[] ca1, Atom[] ca2, int k)
075                        throws StructureException, RefinerFailedException {
076
077                // Transform alignment to a Map
078                Map<Integer, Integer> alignment = AlignmentTools.alignmentAsMap(afpChain);
079
080                // Refine the alignment Map
081                Map<Integer, Integer> refined = refineSymmetry(alignment, k);
082                if (refined.size() < 1)
083                        throw new RefinerFailedException("Refiner returned empty alignment");
084
085                //Substitute and partition the alignment
086                try {
087                        AFPChain refinedAFP = AlignmentTools.replaceOptAln(afpChain, ca1, ca2, refined);
088                        refinedAFP = partitionAFPchain(refinedAFP, ca1, ca2, k);
089                        if (refinedAFP.getOptLength() < 1)
090                                throw new IndexOutOfBoundsException("Refined alignment is empty.");
091                        return refinedAFP;
092                } catch (IndexOutOfBoundsException e){
093                        // This Exception is thrown when the refined alignment is not consistent
094                        throw new RefinerFailedException("Refiner failure: non-consistent result", e);
095                }
096        }
097
098        /**
099         * Refines a CE-Symm alignment so that it is perfectly symmetric.
100         * <p>
101         * The resulting alignment will have a one-to-one correspondance between
102         * aligned residues of each symmetric part.
103         * @param alignment The input alignment, as a map. This will be modified.
104         * @param k Symmetry order. This can be guessed by {@link AlignmentTools#getSymmetryOrder(Map, Map, int, float)}
105         * @return A modified map with the refined alignment
106         * @throws StructureException
107         */
108        public static Map<Integer, Integer> refineSymmetry(Map<Integer, Integer> alignment,int k) throws StructureException {
109
110                // Store scores
111                Map<Integer, Double> scores = null;
112                scores = initializeScores(alignment,scores, k);
113
114                // Store eligible residues
115                // Eligible if:
116                //  1. score(x)>0
117                //  2. f^K-1(x) is defined
118                //      3. score(f^K-1(x))>0
119
120                TreeSet<Integer> forwardLoops = new TreeSet<>();
121                TreeSet<Integer> backwardLoops = new TreeSet<>();
122
123
124                List<Integer> eligible = null;
125                eligible = initializeEligible(alignment,scores,eligible,k,forwardLoops,backwardLoops);
126
127                /* For future heap implementation
128                Comparator<Integer> scoreComparator = new Comparator<Integer>() {
129                        @Override public int compare(Integer o1, Integer o2) {
130                                if(scores.containsKey(o1)) {
131                                        if(scores.containsKey(o2)) {
132                                                // If both have defined scores, compare the scores
133                                                return scores.get(o1).compareTo(scores.get(o2));
134                                        } else {
135                                                // o2 has infinite score, so o1 < o2
136                                                return -1;
137                                        }
138                                } else {
139                                        //o1 has infinite score
140                                        if(scores.containsKey(o2)) {
141                                                // o1 > o2
142                                                return 1;
143                                        } else {
144                                                //both undefined
145                                                return 0;
146                                        }
147                                }
148                        }
149                };
150                PriorityQueue<Integer> heap = new PriorityQueue<Integer>(alignment.size(), scoreComparator);
151                 */
152                //int step = 0;
153                while (!eligible.isEmpty()) {
154                        //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
155
156                        // Find eligible residue with lowest scores
157                        Integer bestRes = null;
158                        double bestResScore = Double.POSITIVE_INFINITY;
159                        for(Integer res : eligible) {
160                                Double score = scores.get(res);
161                                if (score != null && score < bestResScore) {
162                                        bestResScore = score;
163                                        bestRes = res;
164                                }
165                        }
166
167                        // Find f^k-1(bestRes)
168                        Integer resK1 = bestRes;
169                        for (int i = 0; i < k - 1; i++) {
170                                assert (resK1 != null);
171                                resK1 = alignment.get(resK1);
172
173                                // Update scores
174                                scores.put(resK1, 0.0);
175                        }
176                        scores.put(bestRes, 0.0);
177
178                        // Modify alignment
179                        alignment.put(resK1, bestRes);
180
181                        scores = initializeScores(alignment, scores, k);
182
183                        Map<Integer, Double> virginScores = initializeScores(alignment, null, k);
184                        if (scores.size() != virginScores.size()) {
185                                System.out.println("Size missmatch");
186                        } else {
187                                for (Integer key : scores.keySet()) {
188                                        if (!virginScores.containsKey(key) || !scores.get(key).equals(virginScores.get(key))) {
189                                                System.out.format("Mismatch %d -> %f/%f%n", key, scores.get(key), virginScores.get(key));
190                                        }
191                                }
192                        }
193
194                        // Update eligible
195                        // TODO only update residues which could become ineligible
196                        eligible = initializeEligible(alignment, scores, eligible, k, forwardLoops, backwardLoops);
197
198                        // System.out.format("Modifying %d -> %d. %d now eligible.%n", resK1,bestRes,eligible.size());
199                }
200                //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
201
202                // Remove remaining edges
203                Iterator<Integer> alignmentIt = alignment.keySet().iterator();
204                while (alignmentIt.hasNext()) {
205                        Integer res = alignmentIt.next();
206                        Double score = scores.get(res);
207                        if (score == null || score > 0.0) {
208                                alignmentIt.remove();
209                        }
210                }
211                //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
212
213                return alignment;
214        }
215
216        /**
217         * Helper method to initialize eligible residues.
218         *
219         * Eligible if:
220         *  1. score(x)>0
221         *  2. f^K-1(x) is defined
222         *  3. score(f^K-1(x))>0
223         *  4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
224         * @param alignment The alignment with respect to which to calculate eligibility
225         * @param scores An up-to-date map from residues to their scores
226         * @param eligible Starting list of eligible residues. If null will be generated.
227         * @param k
228         * @param backwardLoops
229         * @param forwardLoops
230         * @return eligible after modification
231         */
232        private static List<Integer> initializeEligible(Map<Integer, Integer> alignment,
233                        Map<Integer, Double> scores, List<Integer> eligible, int k, NavigableSet<Integer> forwardLoops, NavigableSet<Integer> backwardLoops) {
234                // Eligible if:
235                // 1. score(x)>0
236                // 2. f^K-1(x) is defined
237                // 3. score(f^K-1(x))>0
238                // 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
239                // 5. Not in a loop of length less than k
240
241                // Assume all residues are eligible to start
242                if(eligible == null) {
243                        eligible = new LinkedList<>(alignment.keySet());
244                }
245
246                // Precalculate f^K-1(x)
247                // Map<Integer, Integer> alignK1 = AlignmentTools.applyAlignment(alignment, k-1);
248                Map<Integer, Integer> alignK1 = applyAlignmentAndCheckCycles(alignment, k - 1, eligible);
249
250                // Remove ineligible residues
251                Iterator<Integer> eligibleIt = eligible.iterator();
252                while(eligibleIt.hasNext()) {
253                        Integer res = eligibleIt.next();
254
255                        //  2. f^K-1(x) is defined
256                        if(!alignK1.containsKey(res)) {
257                                eligibleIt.remove();
258                                continue;
259                        }
260                        Integer k1 = alignK1.get(res);
261                        if(k1 == null) {
262                                eligibleIt.remove();
263                                continue;
264                        }
265
266                        //  1. score(x)>0
267                        Double score = scores.get(res);
268                        if(score == null || score <= 0.0) {
269                                eligibleIt.remove();
270
271                                // res is in a loop. Add it to the proper set
272                                if(res <= alignment.get(res)) {
273                                        //forward
274                                        forwardLoops.add(res);
275                                } else {
276                                        //backward
277                                        backwardLoops.add(res);
278                                }
279
280                                continue;
281                        }
282                        //      3. score(f^K-1(x))>0
283                        Double scoreK1 = scores.get(k1);
284                        if(scoreK1 == null || scoreK1 <= 0.0) {
285                                eligibleIt.remove();
286                                continue;
287                        }
288                }
289
290
291                // Now that loops are up-to-date, check for loop crossings
292                eligibleIt = eligible.iterator();
293                while(eligibleIt.hasNext()) {
294                        Integer res = eligibleIt.next();
295
296                        //4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
297                        //Test equivalent: All loop edges should be properly ordered wrt edge f^k-1(x) -> x
298
299                        Integer src = alignK1.get(res);
300
301                        if( src < res  ) {
302                                //forward
303                                // get interval [a,b) containing res
304                                Integer a = forwardLoops.floor(src);
305                                Integer b = forwardLoops.higher(src);
306
307                                // Ineligible unless f(a) < res < f(b)
308                                if(a != null && alignment.get(a) > res ) {
309                                        eligibleIt.remove();
310                                        continue;
311                                }
312                                if(b != null && alignment.get(b) < res ) {
313                                        eligibleIt.remove();
314                                        continue;
315                                }
316                        }
317                }
318
319                return eligible;
320        }
321
322
323        /**
324         * Like {@link AlignmentTools#applyAlignment(Map, int)}, returns a map of k applications of alignmentMap. However,
325         * it also sets loops of size less than k as ineligible.
326         *
327         * @param alignmentMap
328         *            f(x)
329         * @param k
330         * @param eligible
331         *            Eligible residues. Residues from small cycles are removed.
332         * @return f^k(x)
333         */
334        private static Map<Integer, Integer> applyAlignmentAndCheckCycles(Map<Integer, Integer> alignmentMap, int k, List<Integer> eligible) {
335
336                // Convert to lists to establish a fixed order (avoid concurrent modification)
337                List<Integer> preimage = new ArrayList<>(alignmentMap.keySet()); // currently unmodified
338                List<Integer> image = new ArrayList<>(preimage);
339
340                for (int n = 1; n <= k; n++) {
341                        // apply alignment
342                        for (int i = 0; i < image.size(); i++) {
343                                final Integer pre = image.get(i);
344                                final Integer post = (pre == null ? null : alignmentMap.get(pre));
345                                image.set(i, post);
346
347                                // Make cycles ineligible
348                                if (post != null && post.equals(preimage.get(i))) {
349                                        eligible.remove(preimage.get(i)); // Could be O(n) with List impl
350                                }
351                        }
352                }
353
354                Map<Integer, Integer> imageMap = new HashMap<>(alignmentMap.size());
355
356                // now populate with actual values
357                for (int i = 0; i < preimage.size(); i++) {
358                        Integer pre = preimage.get(i);
359                        Integer postK = image.get(i);
360                        imageMap.put(pre, postK);
361                }
362                return imageMap;
363        }
364
365        /**
366         * Calculates all scores for an alignment
367         * @param alignment
368         * @param scores A mapping from residues to scores, which will be updated or
369         *      created if null
370         * @return scores
371         */
372        private static Map<Integer, Double> initializeScores(Map<Integer, Integer> alignment,
373                        Map<Integer, Double> scores, int k) {
374                if(scores == null) {
375                        scores = new HashMap<>(alignment.size());
376                } else {
377                        scores.clear();
378                }
379                Map<Integer,Integer> alignK = AlignmentTools.applyAlignment(alignment, k);
380
381                // calculate input range
382                int maxPre = Integer.MIN_VALUE;
383                int minPre = Integer.MAX_VALUE;
384                for(Integer pre : alignment.keySet()) {
385                        if(pre>maxPre) maxPre = pre;
386                        if(pre<minPre) minPre = pre;
387                }
388
389                for(Integer pre : alignment.keySet()) {
390                        Integer image = alignK.get(pre);
391
392                        // Use the absolute error score, |x - f^k(x)|
393                        double score = scoreAbsError(pre,image,minPre,maxPre);
394                        scores.put(pre, score);
395                }
396                return scores;
397        }
398
399
400
401        /**
402         * Calculate the score for a residue, specifically the Absolute Error
403         *      score(x) = |x-f^k(x)|
404         *
405         * Also includes a small bias based on residue number, for uniqueness..
406         * @param pre x
407         * @param image f^k(x)
408         * @param minPre lowest possible residue number
409         * @param maxPre highest possible residue number
410         * @return
411         */
412        private static double scoreAbsError(Integer pre, Integer image,int minPre,int maxPre) {
413                // Use the absolute error score, |x - f^k(x)|
414                double error;
415                if(image == null) {
416                        error = Double.POSITIVE_INFINITY;
417                } else {
418                        error = Math.abs(pre - image);
419                }
420
421                //TODO favor lower degree-in
422
423                // Add fractional portion relative to sequence position, for uniqueness
424                if(error > 0)
425                        error += (double)(pre-minPre)/(1+maxPre-minPre);
426
427                return error;
428        }
429
430        /**
431         *  Partitions an afpChain alignment into order blocks of aligned residues.
432         *
433         * @param afpChain
434         * @param ca1
435         * @param ca2
436         * @param order
437         * @return
438         * @throws StructureException
439         */
440        private static AFPChain partitionAFPchain(AFPChain afpChain,
441                        Atom[] ca1, Atom[] ca2, int order) throws StructureException{
442
443                int[][][] newAlgn = new int[order][][];
444                int repeatLen = afpChain.getOptLength()/order;
445
446                //Extract all the residues considered in the first chain of the alignment
447                List<Integer> alignedRes = new ArrayList<>();
448                for (int su=0; su<afpChain.getBlockNum(); su++){
449                        for (int i=0; i<afpChain.getOptLen()[su]; i++){
450                                alignedRes.add(afpChain.getOptAln()[su][0][i]);
451                        }
452                }
453
454                //Build the new alignment
455                for (int su=0; su<order; su++){
456                        newAlgn[su] = new int[2][];
457                        newAlgn[su][0] = new  int[repeatLen];
458                        newAlgn[su][1] = new  int[repeatLen];
459                        for (int i=0; i<repeatLen; i++){
460                                newAlgn[su][0][i] = alignedRes.get(repeatLen*su+i);
461                                newAlgn[su][1][i] = alignedRes.get(
462                                                (repeatLen*(su+1)+i)%alignedRes.size());
463                        }
464                }
465
466                return AlignmentTools.replaceOptAln(newAlgn, afpChain, ca1, ca2);
467        }
468}