001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.internal; 022 023import java.util.ArrayList; 024import java.util.HashMap; 025import java.util.Iterator; 026import java.util.LinkedList; 027import java.util.List; 028import java.util.Map; 029import java.util.NavigableSet; 030import java.util.TreeSet; 031 032import org.biojava.nbio.structure.Atom; 033import org.biojava.nbio.structure.StructureException; 034import org.biojava.nbio.structure.align.model.AFPChain; 035import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 036import org.biojava.nbio.structure.align.util.AlignmentTools; 037import org.biojava.nbio.structure.symmetry.utils.SymmetryTools; 038 039/** 040 * Creates a refined alignment with the CE-Symm alternative self-alignment. 041 * Needs the order of symmetry and assumes that the last repeat aligns 042 * with the first, being thus a CLOSE symmetry. 043 * 044 * @author Spencer Bliven 045 * @author Aleix Lafita 046 * @since 4.2.0 047 * 048 */ 049public class SequenceFunctionRefiner implements SymmetryRefiner { 050 051 @Override 052 public MultipleAlignment refine(AFPChain selfAlignment, Atom[] atoms, 053 int order) throws RefinerFailedException, StructureException { 054 055 if (order < 2) throw new RefinerFailedException( 056 "Symmetry not found in the structure: order < 2."); 057 058 AFPChain afp = refineSymmetry(selfAlignment, atoms, atoms, order); 059 return SymmetryTools.fromAFP(afp, atoms); 060 } 061 062 /** 063 * Refines a CE-Symm alignment so that it is perfectly symmetric. 064 * 065 * The resulting alignment will have a one-to-one correspondance between 066 * aligned residues of each symmetric part. 067 * 068 * @param afpChain Input alignment from CE-Symm 069 * @param k Symmetry order. This can be guessed by {@link CeSymm#getSymmetryOrder(AFPChain)} 070 * @return The refined alignment 071 * @throws StructureException 072 * @throws RefinerFailedException 073 */ 074 public static AFPChain refineSymmetry(AFPChain afpChain, Atom[] ca1, Atom[] ca2, int k) 075 throws StructureException, RefinerFailedException { 076 077 // Transform alignment to a Map 078 Map<Integer, Integer> alignment = AlignmentTools.alignmentAsMap(afpChain); 079 080 // Refine the alignment Map 081 Map<Integer, Integer> refined = refineSymmetry(alignment, k); 082 if (refined.size() < 1) 083 throw new RefinerFailedException("Refiner returned empty alignment"); 084 085 //Substitute and partition the alignment 086 try { 087 AFPChain refinedAFP = AlignmentTools.replaceOptAln(afpChain, ca1, ca2, refined); 088 refinedAFP = partitionAFPchain(refinedAFP, ca1, ca2, k); 089 if (refinedAFP.getOptLength() < 1) 090 throw new IndexOutOfBoundsException("Refined alignment is empty."); 091 return refinedAFP; 092 } catch (IndexOutOfBoundsException e){ 093 // This Exception is thrown when the refined alignment is not consistent 094 throw new RefinerFailedException("Refiner failure: non-consistent result", e); 095 } 096 } 097 098 /** 099 * Refines a CE-Symm alignment so that it is perfectly symmetric. 100 * 101 * The resulting alignment will have a one-to-one correspondance between 102 * aligned residues of each symmetric part. 103 * @param alignment The input alignment, as a map. This will be modified. 104 * @param k Symmetry order. This can be guessed by {@link CeSymm#getSymmetryOrder(AFPChain)} 105 * @return A modified map with the refined alignment 106 * @throws StructureException 107 */ 108 public static Map<Integer, Integer> refineSymmetry(Map<Integer, Integer> alignment,int k) throws StructureException { 109 110 // Store scores 111 Map<Integer, Double> scores = null; 112 scores = initializeScores(alignment,scores, k); 113 114 // Store eligible residues 115 // Eligible if: 116 // 1. score(x)>0 117 // 2. f^K-1(x) is defined 118 // 3. score(f^K-1(x))>0 119 120 TreeSet<Integer> forwardLoops = new TreeSet<Integer>(); 121 TreeSet<Integer> backwardLoops = new TreeSet<Integer>(); 122 123 124 List<Integer> eligible = null; 125 eligible = initializeEligible(alignment,scores,eligible,k,forwardLoops,backwardLoops); 126 127 /* For future heap implementation 128 Comparator<Integer> scoreComparator = new Comparator<Integer>() { 129 @Override public int compare(Integer o1, Integer o2) { 130 if(scores.containsKey(o1)) { 131 if(scores.containsKey(o2)) { 132 // If both have defined scores, compare the scores 133 return scores.get(o1).compareTo(scores.get(o2)); 134 } else { 135 // o2 has infinite score, so o1 < o2 136 return -1; 137 } 138 } else { 139 //o1 has infinite score 140 if(scores.containsKey(o2)) { 141 // o1 > o2 142 return 1; 143 } else { 144 //both undefined 145 return 0; 146 } 147 } 148 } 149 }; 150 PriorityQueue<Integer> heap = new PriorityQueue<Integer>(alignment.size(), scoreComparator); 151 */ 152 //int step = 0; 153 while (!eligible.isEmpty()) { 154 //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); 155 156 // Find eligible residue with lowest scores 157 Integer bestRes = null; 158 double bestResScore = Double.POSITIVE_INFINITY; 159 for(Integer res : eligible) { 160 Double score = scores.get(res); 161 if (score != null && score < bestResScore) { 162 bestResScore = score; 163 bestRes = res; 164 } 165 } 166 167 // Find f^k-1(bestRes) 168 Integer resK1 = bestRes; 169 for (int i = 0; i < k - 1; i++) { 170 assert (resK1 != null); 171 resK1 = alignment.get(resK1); 172 173 // Update scores 174 scores.put(resK1, 0.0); 175 } 176 scores.put(bestRes, 0.0); 177 178 // Modify alignment 179 alignment.put(resK1, bestRes); 180 181 scores = initializeScores(alignment, scores, k); 182 183 Map<Integer, Double> virginScores = initializeScores(alignment, null, k); 184 if (scores.size() != virginScores.size()) { 185 System.out.println("Size missmatch"); 186 } else { 187 for (Integer key : scores.keySet()) { 188 if (!virginScores.containsKey(key) || !scores.get(key).equals(virginScores.get(key))) { 189 System.out.format("Mismatch %d -> %f/%f%n", key, scores.get(key), virginScores.get(key)); 190 } 191 } 192 } 193 194 // Update eligible 195 // TODO only update residues which could become ineligible 196 eligible = initializeEligible(alignment, scores, eligible, k, forwardLoops, backwardLoops); 197 198 // System.out.format("Modifying %d -> %d. %d now eligible.%n", resK1,bestRes,eligible.size()); 199 } 200 //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); 201 202 // Remove remaining edges 203 Iterator<Integer> alignmentIt = alignment.keySet().iterator(); 204 while (alignmentIt.hasNext()) { 205 Integer res = alignmentIt.next(); 206 Double score = scores.get(res); 207 if (score == null || score > 0.0) { 208 alignmentIt.remove(); 209 } 210 } 211 //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); 212 213 return alignment; 214 } 215 216 /** 217 * Helper method to initialize eligible residues. 218 * 219 * Eligible if: 220 * 1. score(x)>0 221 * 2. f^K-1(x) is defined 222 * 3. score(f^K-1(x))>0 223 * 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) 224 * @param alignment The alignment with respect to which to calculate eligibility 225 * @param scores An up-to-date map from residues to their scores 226 * @param eligible Starting list of eligible residues. If null will be generated. 227 * @param k 228 * @param backwardLoops 229 * @param forwardLoops 230 * @return eligible after modification 231 */ 232 private static List<Integer> initializeEligible(Map<Integer, Integer> alignment, 233 Map<Integer, Double> scores, List<Integer> eligible, int k, NavigableSet<Integer> forwardLoops, NavigableSet<Integer> backwardLoops) { 234 // Eligible if: 235 // 1. score(x)>0 236 // 2. f^K-1(x) is defined 237 // 3. score(f^K-1(x))>0 238 // 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) 239 // 5. Not in a loop of length less than k 240 241 // Assume all residues are eligible to start 242 if(eligible == null) { 243 eligible = new LinkedList<Integer>(alignment.keySet()); 244 } 245 246 // Precalculate f^K-1(x) 247 // Map<Integer, Integer> alignK1 = AlignmentTools.applyAlignment(alignment, k-1); 248 Map<Integer, Integer> alignK1 = applyAlignmentAndCheckCycles(alignment, k - 1, eligible); 249 250 // Remove ineligible residues 251 Iterator<Integer> eligibleIt = eligible.iterator(); 252 while(eligibleIt.hasNext()) { 253 Integer res = eligibleIt.next(); 254 255 // 2. f^K-1(x) is defined 256 if(!alignK1.containsKey(res)) { 257 eligibleIt.remove(); 258 continue; 259 } 260 Integer k1 = alignK1.get(res); 261 if(k1 == null) { 262 eligibleIt.remove(); 263 continue; 264 } 265 266 // 1. score(x)>0 267 Double score = scores.get(res); 268 if(score == null || score <= 0.0) { 269 eligibleIt.remove(); 270 271 // res is in a loop. Add it to the proper set 272 if(res <= alignment.get(res)) { 273 //forward 274 forwardLoops.add(res); 275 } else { 276 //backward 277 backwardLoops.add(res); 278 } 279 280 continue; 281 } 282 // 3. score(f^K-1(x))>0 283 Double scoreK1 = scores.get(k1); 284 if(scoreK1 == null || scoreK1 <= 0.0) { 285 eligibleIt.remove(); 286 continue; 287 } 288 } 289 290 291 // Now that loops are up-to-date, check for loop crossings 292 eligibleIt = eligible.iterator(); 293 while(eligibleIt.hasNext()) { 294 Integer res = eligibleIt.next(); 295 296 //4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) 297 //Test equivalent: All loop edges should be properly ordered wrt edge f^k-1(x) -> x 298 299 Integer src = alignK1.get(res); 300 301 if( src < res ) { 302 //forward 303 // get interval [a,b) containing res 304 Integer a = forwardLoops.floor(src); 305 Integer b = forwardLoops.higher(src); 306 307 // Ineligible unless f(a) < res < f(b) 308 if(a != null && alignment.get(a) > res ) { 309 eligibleIt.remove(); 310 continue; 311 } 312 if(b != null && alignment.get(b) < res ) { 313 eligibleIt.remove(); 314 continue; 315 } 316 } 317 } 318 319 return eligible; 320 } 321 322 323 /** 324 * Like {@link AlignmentTools#applyAlignment(Map, int)}, returns a map of k applications of alignmentMap. However, 325 * it also sets loops of size less than k as ineligible. 326 * 327 * @param alignmentMap 328 * f(x) 329 * @param k 330 * @param eligible 331 * Eligible residues. Residues from small cycles are removed. 332 * @return f^k(x) 333 */ 334 private static Map<Integer, Integer> applyAlignmentAndCheckCycles(Map<Integer, Integer> alignmentMap, int k, List<Integer> eligible) { 335 336 // Convert to lists to establish a fixed order (avoid concurrent modification) 337 List<Integer> preimage = new ArrayList<Integer>(alignmentMap.keySet()); // currently unmodified 338 List<Integer> image = new ArrayList<Integer>(preimage); 339 340 for (int n = 1; n <= k; n++) { 341 // apply alignment 342 for (int i = 0; i < image.size(); i++) { 343 final Integer pre = image.get(i); 344 final Integer post = (pre == null ? null : alignmentMap.get(pre)); 345 image.set(i, post); 346 347 // Make cycles ineligible 348 if (post != null && post.equals(preimage.get(i))) { 349 eligible.remove(preimage.get(i)); // Could be O(n) with List impl 350 } 351 } 352 } 353 354 Map<Integer, Integer> imageMap = new HashMap<Integer, Integer>(alignmentMap.size()); 355 356 // now populate with actual values 357 for (int i = 0; i < preimage.size(); i++) { 358 Integer pre = preimage.get(i); 359 Integer postK = image.get(i); 360 imageMap.put(pre, postK); 361 } 362 return imageMap; 363 } 364 365 /** 366 * Calculates all scores for an alignment 367 * @param alignment 368 * @param scores A mapping from residues to scores, which will be updated or 369 * created if null 370 * @return scores 371 */ 372 private static Map<Integer, Double> initializeScores(Map<Integer, Integer> alignment, 373 Map<Integer, Double> scores, int k) { 374 if(scores == null) { 375 scores = new HashMap<Integer, Double>(alignment.size()); 376 } else { 377 scores.clear(); 378 } 379 Map<Integer,Integer> alignK = AlignmentTools.applyAlignment(alignment, k); 380 381 // calculate input range 382 int maxPre = Integer.MIN_VALUE; 383 int minPre = Integer.MAX_VALUE; 384 for(Integer pre : alignment.keySet()) { 385 if(pre>maxPre) maxPre = pre; 386 if(pre<minPre) minPre = pre; 387 } 388 389 for(Integer pre : alignment.keySet()) { 390 Integer image = alignK.get(pre); 391 392 // Use the absolute error score, |x - f^k(x)| 393 double score = scoreAbsError(pre,image,minPre,maxPre); 394 scores.put(pre, score); 395 } 396 return scores; 397 } 398 399 400 401 /** 402 * Calculate the score for a residue, specifically the Absolute Error 403 * score(x) = |x-f^k(x)| 404 * 405 * Also includes a small bias based on residue number, for uniqueness.. 406 * @param pre x 407 * @param image f^k(x) 408 * @param minPre lowest possible residue number 409 * @param maxPre highest possible residue number 410 * @return 411 */ 412 private static double scoreAbsError(Integer pre, Integer image,int minPre,int maxPre) { 413 // Use the absolute error score, |x - f^k(x)| 414 double error; 415 if(image == null) { 416 error = Double.POSITIVE_INFINITY; 417 } else { 418 error = Math.abs(pre - image); 419 } 420 421 //TODO favor lower degree-in 422 423 // Add fractional portion relative to sequence position, for uniqueness 424 if(error > 0) 425 error += (double)(pre-minPre)/(1+maxPre-minPre); 426 427 return error; 428 } 429 430 /** 431 * Partitions an afpChain alignment into order blocks of aligned residues. 432 * 433 * @param afpChain 434 * @param ca1 435 * @param ca2 436 * @param order 437 * @return 438 * @throws StructureException 439 */ 440 private static AFPChain partitionAFPchain(AFPChain afpChain, 441 Atom[] ca1, Atom[] ca2, int order) throws StructureException{ 442 443 int[][][] newAlgn = new int[order][][]; 444 int repeatLen = afpChain.getOptLength()/order; 445 446 //Extract all the residues considered in the first chain of the alignment 447 List<Integer> alignedRes = new ArrayList<Integer>(); 448 for (int su=0; su<afpChain.getBlockNum(); su++){ 449 for (int i=0; i<afpChain.getOptLen()[su]; i++){ 450 alignedRes.add(afpChain.getOptAln()[su][0][i]); 451 } 452 } 453 454 //Build the new alignment 455 for (int su=0; su<order; su++){ 456 newAlgn[su] = new int[2][]; 457 newAlgn[su][0] = new int[repeatLen]; 458 newAlgn[su][1] = new int[repeatLen]; 459 for (int i=0; i<repeatLen; i++){ 460 newAlgn[su][0][i] = alignedRes.get(repeatLen*su+i); 461 newAlgn[su][1][i] = alignedRes.get( 462 (repeatLen*(su+1)+i)%alignedRes.size()); 463 } 464 } 465 466 return AlignmentTools.replaceOptAln(newAlgn, afpChain, ca1, ca2); 467 } 468}