001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.core; 022 023import org.biojava.nbio.structure.Atom; 024import org.biojava.nbio.structure.StructureException; 025import org.biojava.nbio.structure.align.StructureAlignment; 026import org.biojava.nbio.structure.align.StructureAlignmentFactory; 027import org.biojava.nbio.structure.align.ce.CeMain; 028import org.biojava.nbio.structure.align.ce.CeParameters; 029import org.biojava.nbio.structure.align.model.AFPChain; 030import org.biojava.nbio.structure.align.seq.SmithWaterman3Daligner; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import java.util.*; 035 036/** 037 * Represents a cluster of equivalent sequences 038 * 039 */ 040public class SequenceAlignmentCluster implements Cloneable { 041 042 private static final Logger logger = LoggerFactory.getLogger(SequenceAlignmentCluster.class); 043 044 private QuatSymmetryParameters parameters = null; 045 private List<UniqueSequenceList> uniqueSequenceList = new ArrayList<UniqueSequenceList>(); 046 private List<Atom[]> alignedCAlphaAtoms = null; 047 048 private int alignmentLength = 0; 049 private double minSequenceIdentity = 1.0; 050 private double maxSequenceIdentity = 0.0; 051 052 private boolean modified = true; 053 054 public SequenceAlignmentCluster (QuatSymmetryParameters parameters) { 055 this.parameters = parameters; 056 } 057 058 public boolean isPseudoStoichiometric() { 059 return minSequenceIdentity < parameters.getSequencePseudoSymmetryThreshold(); 060 } 061 062 public double getMinSequenceIdentity() { 063 if (! isPseudoStoichiometric()) { 064 return 1.0; 065 } 066 return minSequenceIdentity; 067 } 068 069 public void setMinSequenceIdentity(double minSequenceIdentity) { 070 this.minSequenceIdentity = minSequenceIdentity; 071 } 072 073 public double getMaxSequenceIdentity() { 074 if (! isPseudoStoichiometric()) { 075 return 1.0; 076 } 077 return maxSequenceIdentity; 078 } 079 080 public void setMaxSequenceIdentity(double maxSequenceIdentity) { 081 this.maxSequenceIdentity = maxSequenceIdentity; 082 } 083 084 public void addUniqueSequenceList(UniqueSequenceList sequenceList) { 085 uniqueSequenceList.add(sequenceList); 086 modified = true; 087 } 088 089 /** 090 * @return the number of sequences which have been added to this cluster 091 */ 092 public int getSequenceCount() { 093 return uniqueSequenceList.size(); 094 } 095 096 public int getSequenceAlignmentLength() { 097 run(); 098 return alignmentLength; 099 } 100 101 public List<UniqueSequenceList> getUniqueSequenceList() { 102 return uniqueSequenceList; 103 } 104 105 public List<String> getChainIds() { 106 List<String> ids = new ArrayList<String>(); 107 for (UniqueSequenceList list: uniqueSequenceList) { 108 ids.add(list.getChainId()); 109 } 110 return ids; 111 } 112 113 public List<Integer> getModelNumbers() { 114 List<Integer> numbers = new ArrayList<Integer>(); 115 for (UniqueSequenceList list: uniqueSequenceList) { 116 numbers.add(list.getModelNumber()); 117 } 118 return numbers; 119 } 120 121 public List<Integer> getStructureIds() { 122 List<Integer> numbers = new ArrayList<Integer>(); 123 for (UniqueSequenceList list: uniqueSequenceList) { 124 numbers.add(list.getStructureId()); 125 } 126 return numbers; 127 } 128 129 public List<Atom[]> getAlignedCalphaAtoms() { 130 run(); 131 return alignedCAlphaAtoms; 132 } 133 134 /** 135 * Match a sequence to this cluster at 100% identity. 136 * 137 * If the given sequence matches the cluster seed (100%), then performs an 138 * alignment to the seed and adds it to the {@link #getUniqueSequenceList() 139 * unique sequence list}. 140 * 141 * @param cAlphaAtoms 142 * @param chainId 143 * @param modelNumber 144 * @param structureId 145 * @param sequence 146 * @return 147 */ 148 public boolean identityMatch(Atom[] cAlphaAtoms, String chainId, int modelNumber, int structureId, String sequence) { 149 UniqueSequenceList u = uniqueSequenceList.get(0); 150 151 // check for 100% identity match of reference sequence 152 String refSequence = u.getSeqResSequence(); 153 boolean seqMatch = refSequence.equals(sequence); 154 155 // if reference (SEQRES) sequences match 100%, 156 // find alignment of atom sequences by Smith-Waterman alignment 157 if (seqMatch) { 158 List<Integer> alig1 = new ArrayList<Integer>(); 159 List<Integer> alig2 = new ArrayList<Integer>(); 160 Atom[] referenceAtoms = u.getCalphaAtoms(); 161 int inCommon = 0; 162 try { 163 inCommon = alignIdenticalSequence(referenceAtoms, cAlphaAtoms, alig1, alig2); 164 } catch (StructureException e) { 165 // this happens in some cases like all X sequences, e.g. 1s1o or 1s4a 166 logger.warn("Could not align identical sequences {}: {} and {}: {}. Chains won't be clustered together.", 167 u.getChainId(),refSequence,chainId,sequence); 168 } 169 170 if (inCommon > 0) { 171 UniqueSequenceList seqList = new UniqueSequenceList(cAlphaAtoms, chainId, modelNumber, structureId, sequence); 172 seqList.setAlignment1(alig1); 173 seqList.setAlignment2(alig2); 174 // System.out.println(alig1); 175 // System.out.println(alig2); 176 addUniqueSequenceList(seqList); 177 return true; 178 } 179 } 180 181 return false; 182 } 183 184 public PairwiseAlignment getPairwiseAlignment(SequenceAlignmentCluster other) { 185 PairwiseAlignment alignment = new PairwiseAlignment(this, other); 186 187 Atom[] referenceAtoms1 = this.getUniqueSequenceList().get(0).getCalphaAtoms(); 188 Atom[] referenceAtoms2 = other.getUniqueSequenceList().get(0).getCalphaAtoms(); 189 190 double alignmentLengthFraction = (double)Math.min(referenceAtoms1.length, referenceAtoms2.length) / 191 Math.max(referenceAtoms1.length, referenceAtoms2.length); 192 193 if (alignmentLengthFraction < parameters.getAlignmentFractionThreshold()) { 194 return null; 195 } 196 197 AFPChain afp = alignPairByStructure(referenceAtoms1, referenceAtoms2,parameters.isVerbose()); 198 if (afp == null) { 199 return null; 200 } 201 202 if (! afp.isSignificantResult()) { 203 return null; 204 205 // alternative: tmSCore: 206 // double tmScore = AFPChainScorer.getTMScore(afpChain, ca1, ca2); 207 // if ( tmScore < 0.35) { 208 // return null ... 209 } 210 211 int[][][] align = afp.getOptAln(); 212 if (align == null) { 213 return null; 214 } 215 216 alignmentLengthFraction = (double)afp.getOptLength()/Math.max(referenceAtoms1.length, referenceAtoms2.length); 217 alignment.setAlignmentLengthFraction(alignmentLengthFraction); 218 alignment.setRmsd(afp.getChainRmsd()); 219 alignment.setSequenceIdentity(afp.getIdentity()); 220 alignment.setAlignment(afp.getOptAln()); 221 222 return alignment; 223 } 224 225 @Override 226 public Object clone() { 227 SequenceAlignmentCluster copy = null; 228 try { 229 copy = (SequenceAlignmentCluster) super.clone(); 230 } catch (CloneNotSupportedException e) { 231 logger.error("CloneNotSupportedException caught",e); 232 } 233 // deep copy sequences 234 copy.uniqueSequenceList = new ArrayList<UniqueSequenceList>(); 235 for (UniqueSequenceList seq: this.getUniqueSequenceList()) { 236 copy.addUniqueSequenceList((UniqueSequenceList) seq.clone()); 237 } 238 return copy; 239 } 240 241 @Override 242 public String toString() { 243 StringBuilder builder = new StringBuilder(); 244 for (UniqueSequenceList u: uniqueSequenceList) { 245 builder.append(u.toString()); 246 builder.append("\n"); 247 } 248 return builder.toString(); 249 } 250 251 private void run() { 252 if (modified) { 253 alignedCAlphaAtoms = null; 254 createAlignedCAlphaAtoms(); 255 modified = false; 256 } 257 } 258 259 private static AFPChain alignPairBySequence(Atom[] ca1Seq, Atom[] ca2Seq) throws StructureException { 260 SmithWaterman3Daligner aligner = new SmithWaterman3Daligner(); 261 return aligner.align(ca1Seq, ca2Seq); 262 } 263 264 private static AFPChain alignPairByStructure(Atom[] ca1Seq, Atom[] ca2Seq, boolean verbose) { 265 CeParameters params = new CeParameters(); 266 267 AFPChain afp = null; 268 try { 269 StructureAlignment algorithm = StructureAlignmentFactory.getAlgorithm(CeMain.algorithmName); 270 afp = algorithm.align(ca1Seq,ca2Seq,params); 271 if (verbose) { 272 System.out.println(afp.toFatcat(ca1Seq, ca2Seq)); 273 } 274 } catch (StructureException e) { 275 logger.error("StructureException caught",e); 276 } 277 return afp; 278 } 279 280 281 private static int alignIdenticalSequence(Atom[] ca1Seq, Atom[] ca2Seq, List<Integer> align1, List<Integer> align2) throws StructureException { 282 AFPChain afp = alignPairBySequence(ca1Seq, ca2Seq); 283 int[][][] align = afp.getOptAln(); 284 if (align == null) { 285 return 0; 286 } 287 int len = afp.getOptLength(); 288 289 List<Integer> delta = new ArrayList<Integer>(); 290 Set<Integer> unique = new HashSet<Integer>(); 291 292 for (int i = 0; i < len; i++) { 293 Atom a1 = ca1Seq[align[0][0][i]]; 294 String residueName1 = a1.getGroup().getPDBName(); 295 Atom a2 = ca2Seq[align[0][1][i]]; 296 String residueName2 = a2.getGroup().getPDBName(); 297 if (residueName1.equals(residueName2)) { 298 int n1 = a1.getGroup().getResidueNumber().getSeqNum(); 299 int n2 = a2.getGroup().getResidueNumber().getSeqNum(); 300 delta.add(n2-n1); 301 unique.add(n2-n1); 302 } 303 } 304 305 int offset = 0; 306 int frequency = 0; 307 for (Integer i: unique) { 308 int freq = Collections.frequency(delta, i); 309 if (freq > frequency) { 310 offset = i; 311 frequency = freq; 312 } 313 } 314 315 for (int i = 0; i < len; i++) { 316 Atom a1 = ca1Seq[align[0][0][i]]; 317 int n1 = a1.getGroup().getResidueNumber().getSeqNum(); 318 Atom a2 = ca2Seq[align[0][1][i]]; 319 int n2 = a2.getGroup().getResidueNumber().getSeqNum(); 320 if (n2 - offset == n1) { 321 align1.add(align[0][0][i]); 322 align2.add(align[0][1][i]); 323 } 324 } 325// System.out.println("PDB alignment: "); 326// System.out.println(align1); 327// System.out.println(align2); 328 return align1.size(); 329 } 330 331 private void createAlignedCAlphaAtoms() { 332 List<Integer> indices = getReferenceResidueIndices(); 333 alignmentLength = indices.size(); 334 alignedCAlphaAtoms = new ArrayList<Atom[]>(); 335 for (UniqueSequenceList u: uniqueSequenceList) { 336 List<Integer> alignment1 = u.getAlignment1(); 337 List<Integer> alignment2 = u.getAlignment2(); 338 List<Integer> alignmentIndices = new ArrayList<Integer>(); 339 for (int i = 0; i < alignment1.size(); i++) { 340 int a1 = alignment1.get(i); 341 if (indices.contains(a1)) { 342 alignmentIndices.add(alignment2.get(i)); 343 } 344 } 345 Atom[] unalignedAtoms = u.getCalphaAtoms(); 346 Atom[] alignedAtoms = new Atom[alignmentIndices.size()]; 347 for (int j = 0; j < alignedAtoms.length; j++) { 348 alignedAtoms[j] = unalignedAtoms[alignmentIndices.get(j)]; 349 } 350 alignedCAlphaAtoms.add(alignedAtoms); 351 } 352 } 353 354 private List<Integer> getReferenceResidueIndices() { 355 List<Integer> indices = new ArrayList<Integer>(uniqueSequenceList.get(0).getAlignment1()); 356 for (UniqueSequenceList u: uniqueSequenceList) { 357 indices.retainAll(u.getAlignment1()); 358 } 359 return indices; 360 } 361}