001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.domain.pdp; 022 023import java.util.ArrayList; 024import java.util.List; 025 026 027 028public class ClusterDomains { 029 030 031 static private boolean verbose = CutDomain.verbose; 032 033 private static int ndom; 034 public static List<Domain> cluster(List<Domain> domains, PDPDistanceMatrix pdpDistMatrix){ 035 036 ndom = domains.size(); 037 038 039 int Si = -1; 040 int Sj = -1; 041 int Sis = -1; 042 int Sjs = -1; 043 int Sim = -1; 044 int Sjm = -1; 045 046 long total_max_contacts = 0; 047 048 double maximum_values = PDPParameters.CUT_OFF_VALUE1S; 049 double maximum_valuem = PDPParameters.CUT_OFF_VALUE1M; 050 double maximum_value = PDPParameters.CUT_OFF_VALUE1; 051 052 053 054 if (ndom < 2) return domains; 055 056 /* 057 for(i=0;i<ndom;i++) 058 domains.get(i).avd=domcont(domains.get(i)); 059 */ 060 061 if(verbose) listdomains (domains); 062 063 do { 064 for(int i=0;i<ndom-1;i++) { 065 for(int j=i+1;j<ndom;j++) { 066 Domain d1 = domains.get(i); 067 Domain d2 = domains.get(j); 068 long total_contacts = getTotalContacts(domains,pdpDistMatrix,d1,d2); 069 System.out.println(" pos: d1:" + i + " vs d2:" +j + " d1:" + d1.getSegmentAtPos(0).getFrom() + "-" + d1.getSegmentAtPos(0).getTo() + " " + d2.getSegmentAtPos(0).getFrom() + "-" + d2.getSegmentAtPos(0).getTo() + " " + total_contacts); 070 int size1dom1=domains.get(i).size; 071 int size2dom2=domains.get(j).size; 072 double minDomSize=Math.min(size1dom1,size2dom2); 073 double maxDomSize=Math.max(size1dom1,size2dom2); 074 075 076 // set some limits on how big the domains can get 077 if(minDomSize>150&&maxDomSize>1.5*minDomSize) maxDomSize=1.5*minDomSize; 078 else if(maxDomSize>2*minDomSize) maxDomSize=2*minDomSize; 079 080 long size1= new Double(Math.min(PDPParameters.MAXSIZE,minDomSize)).longValue(); 081 long size2= new Double(Math.min(PDPParameters.MAXSIZE,maxDomSize)).longValue(); 082 minDomSize=Math.min(Math.pow(minDomSize,1.6/3)+PDPParameters.RG1,Math.pow(minDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1); 083 maxDomSize=Math.min(Math.pow(maxDomSize,1.6/3)+PDPParameters.RG1,Math.pow(maxDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1); 084 085 /* 086 total_max_contacts = 10.+ 087 (long)( (10.*(size1)/(size1+10.)+(size1)*pow((double)(size1),(double)(2./3.))/(size1+10.))*(10.*(size2)/(size2+10.)+(size2)*pow((double)(size2),(double)(2./3.))/(size2+10.))); 088 total_max_contacts = (min(200,size1))*(min(200,size2))/4; 089 */ 090 /* 091 total_max_contacts = min(x*y,MAXCONT); 092 */ 093 total_max_contacts=new Double(minDomSize*maxDomSize*10).longValue(); 094 if(size1>130) total_max_contacts=new Double(minDomSize*maxDomSize*9).longValue(); 095 /* 096 avd=(domains.get(i).avd+domains.get(j).avd)/2; 097 098 S_value=(double)total_contacts/total_max_contacts/avd; 099 */ 100 101 double S_value= total_contacts/(double)total_max_contacts; 102 if(verbose) { 103 System.out.printf(" size1=%d size2=%d minDomSize=%5.2f maxDomSize=%5.2f total_contacts = %d %n", size1,size2,minDomSize,maxDomSize,total_contacts); 104 System.out.printf(" total_contacts = %d total_max_contacts = %d%n", total_contacts, total_max_contacts); 105 System.out.printf(" maximum_value = %f S_value = %f%n%n",maximum_value, S_value); 106 } 107 108 if (S_value > maximum_value) { 109 maximum_value = S_value; 110 Si = i; 111 Sj = j; 112 } 113 if (S_value > maximum_valuem&&size1<70) { 114 maximum_valuem = S_value; 115 Sim = i; 116 Sjm = j; 117 } 118 if (S_value > maximum_values&&size1<52) { 119 maximum_values = S_value; 120 Sis = i; 121 Sjs = j; 122 } 123 total_contacts = 0; 124 total_max_contacts = 0; 125 } 126 } 127 128 if ( verbose) { 129 System.out.println("Check for combining: " + maximum_value + " 1 :" + PDPParameters.CUT_OFF_VALUE1); 130 System.out.println(" " + maximum_valuem + " 1M:" + PDPParameters.CUT_OFF_VALUE1M ); 131 System.out.println(" " + maximum_values + " 1S:" + PDPParameters.CUT_OFF_VALUE1S); 132 } 133 134 if (maximum_value > PDPParameters.CUT_OFF_VALUE1) { 135 /* 136 avd=(domains.get(Si).avd+domains.get(Sj).avd)/2; 137 */ 138 if(verbose) System.out.println(" Criteria 1 matched"); 139 if(verbose) System.out.printf(" maximum_value = %f%n", maximum_value); 140 if(verbose) System.out.printf(" Si = %d Sj = %d %n", Si, Sj); 141 domains = combine(domains,Si, Sj, maximum_value); 142 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 143 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 144 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 145 /* 146 domains.get(Si).avd=domcont(domains.get(Si)); 147 domains.get(Sj).avd=domcont(domains.get(Sj)); 148 */ 149 if(verbose) System.out.println(String.format(" Listing the domains after combining...")); 150 if(verbose) listdomains (domains); 151 } 152 else if (maximum_valuem > PDPParameters.CUT_OFF_VALUE1M) { 153 /* 154 avd=(domains[Sim].avd+domains[Sjm].avd)/2; 155 */ 156 if(verbose) System.out.println(" Criteria 2 matched"); 157 if(verbose) System.out.printf(" maximum_values = %f%n", maximum_valuem); 158 if(verbose) System.out.printf(" Sim = %d Sjm = %d%n", Sim, Sjm); 159 domains = combine(domains, Sim, Sjm, maximum_valuem); 160 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 161 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 162 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 163 /* 164 domains[Sim].avd=domcont(domains[Sim]); 165 domains[Sjm].avd=domcont(domains[Sjm]); 166 */ 167 if(verbose) System.out.println(String.format(" Listing the domains after combining...")); 168 if(verbose) listdomains (domains); 169 } 170 else if (maximum_values > PDPParameters.CUT_OFF_VALUE1S) { 171 /* 172 avd=(domains[Sis].avd+domains[Sjs].avd)/2; 173 */ 174 if(verbose) System.out.println(" Criteria 3 matched"); 175 if(verbose) System.out.printf(" maximum_values = %f%n", maximum_values); 176 if(verbose) System.out.printf(" Sis = %d Sjs = %d%n", Sis, Sjs); 177 domains = combine(domains, Sis, Sjs, maximum_values); 178 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 179 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 180 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 181 /* 182 domains[Sis].avd=domcont(domains[Sis]); 183 domains[Sjs].avd=domcont(domains[Sjs]); 184 */ 185 if(verbose) System.out.println(" Listing the domains after combining..."); 186 if(verbose) listdomains(domains); 187 } 188 else { 189 if(verbose) System.out.printf(" Maximum value is less than cut off value. (max:%f)%n", maximum_value); 190 maximum_value = -1.0; 191 maximum_values = -1.0; 192 maximum_valuem = -1.0; 193 194 } 195 } while ( maximum_value > 0.0||maximum_values>0.0||maximum_valuem>0.0); 196 197 if(verbose) System.out.println(String.format(" The domains are:")); 198 if(verbose) listdomains(domains); 199 return domains; 200 } 201 202 203 204 private static long getTotalContacts(List<Domain> domains, 205 PDPDistanceMatrix pdpDistMatrix, Domain i, Domain j) { 206 long total_contacts=0; 207 208 209 210 211 for(int k=0;k<i.nseg;k++) { 212 for(int l=0;l<j.nseg;l++) { 213 long contacts = calc_S(j.getSegmentAtPos(l).getFrom(), 214 j.getSegmentAtPos(l).getTo(), 215 i.getSegmentAtPos(k).getFrom(), 216 i.getSegmentAtPos(k).getTo(), 217 pdpDistMatrix); 218 total_contacts += contacts; 219 } 220 } 221 return total_contacts; 222 } 223 224 225 226 private static List<Domain> combine(List<Domain> domains,int Si, int Sj, double maximum_value) { 227 228 if ( verbose) 229 System.out.println(" +++ combining domains " + Si + " " + Sj); 230 231 List<Domain> newdoms = new ArrayList<Domain>(); 232 233 //int ndom = domains.size(); 234 for(int i=0;i<domains.get(Sj).nseg;i++) { 235 domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setFrom(domains.get(Sj).getSegmentAtPos(i).getFrom()); 236 domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setTo(domains.get(Sj).getSegmentAtPos(i).getTo()); 237 domains.get(Si).nseg++; 238 } 239 domains.get(Si).size+=domains.get(Sj).size; 240 241 242 for(int i=0;i<domains.get(ndom-1).nseg;i++) { 243 domains.get(Sj).getSegmentAtPos(i).setFrom(domains.get(ndom-1).getSegmentAtPos(i).getFrom()); 244 domains.get(Sj).getSegmentAtPos(i).setTo(domains.get(ndom-1).getSegmentAtPos(i).getTo()); 245 246 } 247 for ( int i =0; i < domains.size(); i++){ 248 if ( i == Sj)continue; 249 newdoms.add(domains.get(i)); 250 } 251 domains.get(Sj).size=domains.get(ndom-1).size; 252 domains.get(Sj).nseg=domains.get(ndom-1).nseg; 253 254 ndom--; 255 return newdoms; 256 257 } 258 259 private static long calc_S (int a1,int b1,int a2,int b2, PDPDistanceMatrix pdpDistMatrix) 260 { 261 262 long contacts = 0; 263 264 int[][] dist = pdpDistMatrix.getDist(); 265 266 for(int i=a1;i<=b1;i++) 267 for(int j=a2;j<=b2;j++) 268 contacts+=dist[i][j]; 269 270 return contacts; 271 } 272 273 private static final void listdomains(List<Domain> domains){ 274 275 int i = -1; 276 for ( Domain dom : domains){ 277 i++; 278 System.out.println("DOMAIN:" + i + " size:" + dom.size + " " + dom.score); 279 List<Segment> segments = dom.getSegments(); 280 281 for ( Segment s : segments){ 282 System.out.println(" Segment: " + s); 283 284 } 285 } 286 } 287}