001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.internal; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collections; 026import java.util.List; 027import java.util.Set; 028 029import javax.vecmath.Matrix4d; 030 031import org.biojava.nbio.structure.Atom; 032import org.biojava.nbio.structure.ResidueNumber; 033import org.biojava.nbio.structure.StructureException; 034import org.biojava.nbio.structure.StructureIdentifier; 035import org.biojava.nbio.structure.align.multiple.Block; 036import org.biojava.nbio.structure.align.multiple.BlockImpl; 037import org.biojava.nbio.structure.align.multiple.BlockSet; 038import org.biojava.nbio.structure.align.multiple.BlockSetImpl; 039import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 040import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; 041import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; 042import org.biojava.nbio.structure.secstruc.SecStrucElement; 043import org.biojava.nbio.structure.secstruc.SecStrucTools; 044import org.biojava.nbio.structure.secstruc.SecStrucType; 045import org.biojava.nbio.structure.symmetry.internal.CESymmParameters.RefineMethod; 046import org.biojava.nbio.structure.symmetry.internal.CESymmParameters.SymmetryType; 047import org.biojava.nbio.structure.symmetry.utils.SymmetryTools; 048import org.jgrapht.UndirectedGraph; 049import org.jgrapht.alg.ConnectivityInspector; 050import org.jgrapht.graph.DefaultEdge; 051import org.jgrapht.graph.SimpleGraph; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Iterative version of CeSymm that aims at identifying all symmetry axis of a 057 * structure. 058 * <p> 059 * Works in the following way: 060 * <ul> 061 * <li>Run CeSymm on the original structure. 062 * <li>Calculate the symmetric unit boundaries. 063 * <li>Run CeSymm on one of the symmetric units to find further symmetries. 064 * <li>Repeat the last two steps until no more significant results are found. 065 * <li>Map back all residues in a multiple alignment of the repeats. 066 * </ul> 067 * </li> 068 * 069 * @author Aleix Lafita 070 * @since 4.1.1 071 * 072 */ 073public class CeSymmIterative { 074 075 private static final Logger logger = LoggerFactory 076 .getLogger(CeSymmIterative.class); 077 078 private CESymmParameters params; 079 private UndirectedGraph<Integer, DefaultEdge> alignGraph; // cumulative 080 private List<CeSymmResult> levels; // symmetry at each level 081 082 /** 083 * For the iterative algorithm to work properly the refinement and 084 * optimization options should be turned on, because the alignment has to be 085 * consistent at every recursive step. 086 * 087 * @param param 088 * CeSymm parameters, make sure they are cloned 089 */ 090 public CeSymmIterative(CESymmParameters param) { 091 params = param; 092 alignGraph = new SimpleGraph<Integer, DefaultEdge>(DefaultEdge.class); 093 levels = new ArrayList<CeSymmResult>(); 094 } 095 096 /** 097 * This method uses iteratively CeSymm to calculate all symmetries in the 098 * input array of atoms and organize them in a multiple alignment of the 099 * repeats. 100 * 101 * @param atoms 102 * atoms 103 * @return CeSymmResult 104 * 105 * @throws StructureException 106 */ 107 public CeSymmResult execute(Atom[] atoms) throws StructureException { 108 109 // First iterate through all levels and then reconstruct all repeats 110 iterate(atoms); 111 return reconstructSymmResult(atoms); 112 113 } 114 115 /** 116 * This method runs iteratively the analysis of one level of symmetry with 117 * CeSymm on the input Atom array until no more symmetries exist. 118 * 119 * @param atoms 120 * representative Atom array of the Structure 121 * @return true if any symmetry was found, false if asymmetric 122 * @throws StructureException 123 */ 124 private void iterate(Atom[] atoms) throws StructureException { 125 126 logger.debug("Starting new iteration..."); 127 128 // Return if the Atom array is too short 129 if ((atoms.length <= params.getWinSize() 130 || atoms.length <= params.getMinCoreLength()) 131 && !levels.isEmpty()) { 132 logger.debug("Aborting iteration due to insufficient Atom " 133 + "array length: %d", atoms.length); 134 return; 135 } 136 137 // Return if the maximum levels of symmetry have been reached 138 if (params.getSymmLevels() > 0) { 139 if (levels.size() == params.getSymmLevels()) 140 return; 141 } 142 143 // Perform one level CeSymm alignment 144 CeSymmResult result = CeSymm.analyzeLevel(atoms, params); 145 146 if (params.getRefineMethod() == RefineMethod.NOT_REFINED 147 || !result.isSignificant()) { 148 if (levels.isEmpty()) 149 levels.add(result); 150 return; 151 } 152 153 // Generate the Atoms of one of the symmetric repeat 154 Integer start = null; 155 int it = 0; 156 while (start == null) { 157 start = result.getMultipleAlignment().getBlocks().get(0) 158 .getAlignRes().get(0).get(it); 159 it++; 160 } 161 Integer end = null; 162 it = result.getMultipleAlignment().getBlocks().get(0).getAlignRes() 163 .get(0).size() - 1; 164 while (end == null) { 165 end = result.getMultipleAlignment().getBlocks().get(0) 166 .getAlignRes().get(0).get(it); 167 it--; 168 } 169 Atom[] atomsR = Arrays.copyOfRange(atoms, start, end + 1); 170 171 // Check the SSE requirement 172 if (countHelixStrandSSE(atomsR) < params.getSSEThreshold()) { 173 if (levels.isEmpty()) 174 levels.add(result); 175 return; 176 } 177 178 // If symmetric store the residue dependencies in alignment graph 179 Block b = result.getMultipleAlignment().getBlock(0); 180 for (int pos = 0; pos < b.length(); pos++) { 181 for (int su = 0; su < b.size() - 1; su++) { 182 Integer pos1 = b.getAlignRes().get(su).get(pos); 183 Integer pos2 = b.getAlignRes().get(su + 1).get(pos); 184 // Add edge from lower to higher positions 185 if (pos1 != null && pos2 != null) { 186 alignGraph.addVertex(pos1); 187 alignGraph.addVertex(pos2); 188 alignGraph.addEdge(pos1, pos2); 189 } 190 } 191 } 192 193 // Iterate further on those Atoms (of the first repeat only) 194 levels.add(result); 195 iterate(atomsR); 196 } 197 198 /** 199 * After all the analysis iterations have finished, the final Result object 200 * is reconstructed using the cumulative alignment graph. 201 * 202 * @param atoms 203 * the original structure atoms 204 * @return CeSymmResult reconstructed symmetry result 205 * @throws StructureException 206 */ 207 private CeSymmResult reconstructSymmResult(Atom[] atoms) 208 throws StructureException { 209 210 // If one level, nothing to build or calculate 211 if (levels.size() == 1) 212 return levels.get(0); 213 214 CeSymmResult result = new CeSymmResult(); 215 result.setSelfAlignment(levels.get(0).getSelfAlignment()); 216 result.setStructureId(levels.get(0).getStructureId()); 217 result.setAtoms(levels.get(0).getAtoms()); 218 result.setParams(levels.get(0).getParams()); 219 220 // Initialize a new multiple alignment 221 MultipleAlignment msa = new MultipleAlignmentImpl(); 222 msa.getEnsemble().setAtomArrays(new ArrayList<Atom[]>()); 223 msa.getEnsemble().setStructureIdentifiers( 224 new ArrayList<StructureIdentifier>()); 225 msa.getEnsemble().setAlgorithmName(CeSymm.algorithmName); 226 msa.getEnsemble().setVersion(CeSymm.version); 227 228 BlockSet bs = new BlockSetImpl(msa); 229 Block b = new BlockImpl(bs); 230 b.setAlignRes(new ArrayList<List<Integer>>()); 231 232 // Calculate the connected groups of the alignment graph 233 ConnectivityInspector<Integer, DefaultEdge> inspector = new ConnectivityInspector<Integer, DefaultEdge>( 234 alignGraph); 235 List<Set<Integer>> comps = inspector.connectedSets(); 236 List<ResidueGroup> groups = new ArrayList<ResidueGroup>(comps.size()); 237 for (Set<Integer> comp : comps) 238 groups.add(new ResidueGroup(comp)); 239 240 // Calculate the total number of repeats 241 int order = 1; 242 for (CeSymmResult sr : levels) 243 order *= sr.getMultipleAlignment().size(); 244 for (int su = 0; su < order; su++) 245 b.getAlignRes().add(new ArrayList<Integer>()); 246 247 // Construct the resulting MultipleAlignment from ResidueGroups 248 for (ResidueGroup group : groups) { 249 if (group.order() != order) 250 continue; 251 group.combineWith(b.getAlignRes()); 252 } 253 254 // The reconstruction failed, so the top level is returned 255 if (b.length() == 0) 256 return levels.get(0); 257 258 for (int su = 0; su < order; su++) { 259 Collections.sort(b.getAlignRes().get(su)); 260 msa.getEnsemble().getAtomArrays().add(atoms); 261 msa.getEnsemble().getStructureIdentifiers() 262 .add(result.getStructureId()); 263 } 264 265 result.setMultipleAlignment(msa); 266 result.setRefined(true); 267 result.setNumRepeats(order); 268 269 SymmetryAxes axes = recoverAxes(result); 270 result.setAxes(axes); 271 272 // Set the transformations and scores of the final alignment 273 SymmetryTools 274 .updateSymmetryTransformation(result.getAxes(), msa); 275 double tmScore = MultipleAlignmentScorer.getAvgTMScore(msa) 276 * msa.size(); 277 double rmsd = MultipleAlignmentScorer.getRMSD(msa); 278 msa.putScore(MultipleAlignmentScorer.AVGTM_SCORE, tmScore); 279 msa.putScore(MultipleAlignmentScorer.RMSD, rmsd); 280 281 return result; 282 } 283 284 /** 285 * The symmetry axes of each level are recovered after the symmetry analysis 286 * iterations have finished, using the stored MultipleAlignment at each 287 * symmetry level. 288 * @return SymmetryAxes 289 */ 290 private SymmetryAxes recoverAxes(CeSymmResult result) { 291 292 SymmetryAxes axes = new SymmetryAxes(); 293 294 for (int m = 0; m < levels.size(); m++) { 295 296 MultipleAlignment align = levels.get(m).getMultipleAlignment(); 297 Matrix4d axis = align.getBlockSet(0).getTransformations().get(1); 298 SymmetryType type = levels.get(m).getAxes().getElementaryAxis(0).getSymmType(); 299 int order = align.size(); 300 301 axes.addAxis(axis, order, type); 302 } 303 return axes; 304 } 305 306 /** 307 * Calculate the number of helix and strand SSE of a repeat. 308 * 309 * @param atoms 310 * Atom array of the repeat found 311 * @return int number of helix or strand SSE 312 */ 313 private static int countHelixStrandSSE(Atom[] atoms) { 314 315 List<SecStrucElement> sses = SecStrucTools 316 .getSecStrucElements(SymmetryTools.getGroups(atoms)); 317 int count = 0; 318 319 //keep track of different helix types 320 boolean helix = false; 321 int hEnd = 0; 322 323 for (SecStrucElement sse : sses) { 324 SecStrucType t = sse.getType(); 325 if (t.isBetaStrand()) { 326 helix = false; 327 count++; 328 } else if (t.isHelixType()){ 329 if (helix){ 330 // If this helix is contiguous to the previous 331 if (sse.getRange().getStart().getSeqNum() + 1 == hEnd) 332 hEnd = sse.getRange().getEnd().getSeqNum(); 333 else 334 count++; 335 } else 336 count++; 337 } else 338 helix = false; 339 } 340 return count; 341 } 342 343}