001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.core; 022 023import java.util.*; 024 025/** 026 * Merges clusters based on their sequence identity. This class does the actual 027 * agglomerative clustering calculation, while {@link SequenceAlignmentCluster} 028 * stores the results. 029 */ 030public class ClusterMerger { 031 private List<SequenceAlignmentCluster> clusters = null; 032 private QuatSymmetryParameters parameters = null; 033 034 List<PairwiseAlignment> pairwiseAlignments = Collections.emptyList(); 035 036 public ClusterMerger(List<SequenceAlignmentCluster> clusters, QuatSymmetryParameters parameters) { 037 this.clusters = clusters; 038 this.parameters = parameters; 039 } 040 041 /** 042 * Aligns all pairs of input clusters, calculating their pairwise alignments 043 */ 044 public void calcPairwiseAlignments() { 045 pairwiseAlignments = new ArrayList<PairwiseAlignment>(); 046 047 boolean[] merged = new boolean[clusters.size()]; 048 Arrays.fill(merged, false); 049 050 for (int i = 0, n = clusters.size(); i < n-1; i++) { 051 if (! merged[i]) { 052 SequenceAlignmentCluster c1 = clusters.get(i); 053 for (int j = i + 1; j < n; j++) { 054 SequenceAlignmentCluster c2 = clusters.get(j); 055 PairwiseAlignment alignment = c1.getPairwiseAlignment(c2); 056 if (alignment != null && 057 alignment.getAlignmentLengthFraction() >= parameters.getAlignmentFractionThreshold() && 058 alignment.getRmsd() <= parameters.getRmsdThreshold()) { 059 merged[j] = true; 060 pairwiseAlignments.add(alignment); 061 if (parameters.isVerbose()) { 062 System.out.println("ClusterMerger: pairwise cluster alignment: " + i + "-" + j + " seq. identity: " + alignment.getSequenceIdentity()); 063 System.out.println(alignment); 064 } 065 } 066 } 067 } 068 } 069 } 070 071 /** 072 * Combine clusters based on the given sequence identity 073 * @param sequenceIdentityCutoff 074 * @return 075 */ 076 public List<SequenceAlignmentCluster> getMergedClusters(double sequenceIdentityCutoff) { 077 List<SequenceAlignmentCluster> mergedClusters = new ArrayList<SequenceAlignmentCluster>(); 078 Map<SequenceAlignmentCluster, Integer> map = getClusterMap(); 079 080 boolean[] processed = new boolean[clusters.size()]; 081 Arrays.fill(processed, false); 082 083 for (int i = 0, n = clusters.size(); i < n; i++) { 084 SequenceAlignmentCluster cluster = clusters.get(i); 085 SequenceAlignmentCluster clone = null; 086 if (! processed[i]) { 087 clone = (SequenceAlignmentCluster) cluster.clone(); 088 mergedClusters.add(clone); 089 processed[i] = true; 090 } 091 092 for (PairwiseAlignment alignment: pairwiseAlignments) { 093 if (alignment.getCluster1() == cluster && alignment.getSequenceIdentity() >= sequenceIdentityCutoff) { 094 clone.setMinSequenceIdentity(Math.min(clone.getMinSequenceIdentity(), alignment.getSequenceIdentity())); 095 clone.setMaxSequenceIdentity(Math.max(clone.getMaxSequenceIdentity(), alignment.getSequenceIdentity())); 096 combineClusters(clone, alignment); 097 int index = map.get(alignment.getCluster2()); 098 processed[index] = true; 099 } 100 } 101 } 102 103 ProteinSequenceClusterer.sortSequenceClustersBySize(mergedClusters); 104 return mergedClusters; 105 } 106 107 108 private Map<SequenceAlignmentCluster, Integer> getClusterMap() { 109 Map<SequenceAlignmentCluster, Integer> map = new HashMap<SequenceAlignmentCluster, Integer>(); 110 for (int i = 0, n = clusters.size(); i < n; i++) { 111 map.put(clusters.get(i), i); 112 } 113 return map; 114 } 115 116 private void combineClusters(SequenceAlignmentCluster c1, PairwiseAlignment alignment) { 117 SequenceAlignmentCluster c2 = (SequenceAlignmentCluster) alignment.getCluster2().clone(); 118 int[][][] align = alignment.getAlignment(); 119 120 // add alignment for reference sequence 121 UniqueSequenceList u =c2.getUniqueSequenceList().get(0); 122 // set new sequence alignment 123 List<Integer> align1 = new ArrayList<Integer>(align[0][0].length); 124 for (Integer a1: align[0][0]) { 125 align1.add(a1); 126 } 127 u.setAlignment1(align1); 128 129 List<Integer> align2 = new ArrayList<Integer>(align[0][1].length); 130 for (Integer a2: align[0][1]) { 131 align2.add(a2); 132 } 133 u.setAlignment2(align2); 134 c1.addUniqueSequenceList(u); 135 136 // note, i starts at 1 (ONE), since i = 0 corresponds to reference sequence, 137 // which has already been processed above 138 for (int i = 1; i < c2.getUniqueSequenceList().size(); i++) { 139 u =c2.getUniqueSequenceList().get(i); 140 List<Integer> oldAlign1 = u.getAlignment1(); 141 List<Integer> oldAlign2 = u.getAlignment2(); 142 List<Integer> newAlign1 = new ArrayList<Integer>(); 143 List<Integer> newAlign2 = new ArrayList<Integer>(); 144 for (int j = 0; j < align2.size(); j++) { 145 Integer element = align2.get(j); 146 Integer index = oldAlign1.indexOf(element); 147 // map alignment to first reference alignment 148 if (index != null && index >= 0) { 149 newAlign1.add(align1.get(j)); 150 newAlign2.add(oldAlign2.get(index)); 151 } 152 } 153 u.setAlignment1(newAlign1); 154 u.setAlignment2(newAlign2); 155 c1.addUniqueSequenceList(u); 156 } 157 } 158}