001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align; 022 023import org.biojava.nbio.structure.align.pairwise.AlternativeAlignment; 024 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Iterator; 028import java.util.List; 029 030/** 031 * A class that clusters alternative alignments according to their 032 * similarity. 033 * 034 * @author Andreas Prlic 035 * @since 1.5 036 * @version %I% %G% 037 */ 038public class ClusterAltAligs { 039 040 public static final int DEFAULT_CLUSTER_CUTOFF = 95; 041 042 043 public static void cluster(AlternativeAlignment[] aligs ){ 044 cluster(aligs, DEFAULT_CLUSTER_CUTOFF); 045 } 046 047 public static void cluster(AlternativeAlignment[] aligs, int cutoff){ 048 049 050 List<AlternativeAlignment> alist = Arrays.asList(aligs); 051 List<AlternativeAlignment> testAligs = new ArrayList<>(alist); 052 053 List<List<Integer>> clusters = new ArrayList<>(); 054 List<AlternativeAlignment> excludeList = new ArrayList<>(); 055 056 // check how similar the eqrs are... 057 for ( int i=0 ; i< aligs.length;i++){ 058 AlternativeAlignment a = aligs[i]; 059 if ( excludeList.contains(a)){ 060 continue; 061 } 062 int[] idxA = a.getIdx1(); 063 064 Iterator<AlternativeAlignment> iter = testAligs.iterator(); 065 List<AlternativeAlignment> remainList = new ArrayList<>(); 066 List<Integer> currentCluster = new ArrayList<>(); 067 068 currentCluster.add(i); 069 excludeList.add(a); 070 071 int j=-1; 072 while (iter.hasNext()){ 073 j++; 074 AlternativeAlignment b = (AlternativeAlignment) iter.next(); 075 if ( excludeList.contains(b)) 076 continue; 077 078 int[] idxB = b.getIdx1(); 079 080 // compare the eqrs.. 081 int samepos = 0; 082 083 for ( int x = 0 ; x < idxA.length ;x++){ 084 int p1 =idxA[x]; 085 for (int y =0; y< idxB.length ; y++){ 086 int p2 = idxB[y]; 087 if ( p1 == p2){ 088 samepos++; 089 } 090 } 091 } 092 float perpos = (samepos / (float)idxA.length) * 100; 093 //System.out.println("aa " + i + " samepos:"+ samepos + 094 // " l1:"+ idxA.length + " l2:" + idxB.length + " perpos:" + perpos); 095 096 if ( perpos > cutoff){ 097 currentCluster.add(j); 098 excludeList.add(b); 099 } else { 100 remainList.add(b); 101 } 102 103 } 104 clusters.add(currentCluster); 105 if ( remainList.size() == 0) { 106 break; 107 } 108 } 109 110 // now print the clusters... 111 112 Iterator<List<Integer>> iter = clusters.iterator(); 113 int cpos = 0; 114 while (iter.hasNext()){ 115 cpos++; 116 //System.out.println("cluster "+cpos+":"); 117 List<Integer> cluster = iter.next(); 118 Iterator<Integer> iter2 = cluster.iterator(); 119 while (iter2.hasNext()){ 120 Integer i = (Integer) iter2.next(); 121 122 AlternativeAlignment alig = aligs[i]; 123 alig.setCluster(cpos); 124 //System.out.println( " ("+ aligs[i.intValue()]+")"); 125 126 } 127 128 } 129 } 130}