001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.domain.pdp; 022 023import java.util.ArrayList; 024import java.util.List; 025 026 027 028public class ClusterDomains { 029 030 031 static private boolean verbose = CutDomain.verbose; 032 033 private static int ndom; 034 public static List<Domain> cluster(List<Domain> domains, PDPDistanceMatrix pdpDistMatrix){ 035 036 ndom = domains.size(); 037 038 039 int Si = -1; 040 int Sj = -1; 041 int Sis = -1; 042 int Sjs = -1; 043 int Sim = -1; 044 int Sjm = -1; 045 046 long total_max_contacts = 0; 047 048 double maximum_values = PDPParameters.CUT_OFF_VALUE1S; 049 double maximum_valuem = PDPParameters.CUT_OFF_VALUE1M; 050 double maximum_value = PDPParameters.CUT_OFF_VALUE1; 051 052 053 054 if (ndom < 2) return domains; 055 056 /* 057 for(i=0;i<ndom;i++) 058 domains.get(i).avd=domcont(domains.get(i)); 059 */ 060 061 if(verbose) listdomains (domains); 062 063 do { 064 for(int i=0;i<ndom-1;i++) { 065 for(int j=i+1;j<ndom;j++) { 066 Domain d1 = domains.get(i); 067 Domain d2 = domains.get(j); 068 long total_contacts = getTotalContacts(domains,pdpDistMatrix,d1,d2); 069 System.out.println(" pos: d1:" + i + " vs d2:" +j + " d1:" + d1.getSegmentAtPos(0).getFrom() + "-" + d1.getSegmentAtPos(0).getTo() + " " + d2.getSegmentAtPos(0).getFrom() + "-" + d2.getSegmentAtPos(0).getTo() + " " + total_contacts); 070 int size1dom1=domains.get(i).size; 071 int size2dom2=domains.get(j).size; 072 double minDomSize=Math.min(size1dom1,size2dom2); 073 double maxDomSize=Math.max(size1dom1,size2dom2); 074 075 076 // set some limits on how big the domains can get 077 if(minDomSize>150&&maxDomSize>1.5*minDomSize) maxDomSize=1.5*minDomSize; 078 else if(maxDomSize>2*minDomSize) maxDomSize=2*minDomSize; 079 080 long size1= new Double(Math.min(PDPParameters.MAXSIZE,minDomSize)).longValue(); 081 long size2= new Double(Math.min(PDPParameters.MAXSIZE,maxDomSize)).longValue(); 082 minDomSize=Math.min(Math.pow(minDomSize,1.6/3)+PDPParameters.RG1,Math.pow(minDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1); 083 maxDomSize=Math.min(Math.pow(maxDomSize,1.6/3)+PDPParameters.RG1,Math.pow(maxDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1); 084 085 /* 086 total_max_contacts = 10.+ 087 (long)( (10.*(size1)/(size1+10.)+(size1)*pow((double)(size1),(double)(2./3.))/(size1+10.))*(10.*(size2)/(size2+10.)+(size2)*pow((double)(size2),(double)(2./3.))/(size2+10.))); 088 total_max_contacts = (min(200,size1))*(min(200,size2))/4; 089 */ 090 /* 091 total_max_contacts = min(x*y,MAXCONT); 092 */ 093 total_max_contacts=new Double(minDomSize*maxDomSize*10).longValue(); 094 if(size1>130) total_max_contacts=new Double(minDomSize*maxDomSize*9).longValue(); 095 /* 096 avd=(domains.get(i).avd+domains.get(j).avd)/2; 097 098 S_value=(double)total_contacts/total_max_contacts/avd; 099 */ 100 101 double S_value= total_contacts/(double)total_max_contacts; 102 if(verbose) System.out.println(String.format(" size1=%d size2=%d minDomSize=%5.2f maxDomSize=%5.2f total_contacts = %d ", size1,size2,minDomSize,maxDomSize,total_contacts)); 103 if(verbose) System.out.println(String.format(" total_contacts = %d total_max_contacts = %d", total_contacts, total_max_contacts)); 104 if(verbose) System.out.println(String.format(" maximum_value = %f S_value = %f\n",maximum_value, S_value)); 105 106 if (S_value > maximum_value) { 107 maximum_value = S_value; 108 Si = i; 109 Sj = j; 110 } 111 if (S_value > maximum_valuem&&size1<70) { 112 maximum_valuem = S_value; 113 Sim = i; 114 Sjm = j; 115 } 116 if (S_value > maximum_values&&size1<52) { 117 maximum_values = S_value; 118 Sis = i; 119 Sjs = j; 120 } 121 total_contacts = 0; 122 total_max_contacts = 0; 123 } 124 } 125 126 if ( verbose) { 127 System.out.println("Check for combining: " + maximum_value + " 1 :" + PDPParameters.CUT_OFF_VALUE1); 128 System.out.println(" " + maximum_valuem + " 1M:" + PDPParameters.CUT_OFF_VALUE1M ); 129 System.out.println(" " + maximum_values + " 1S:" + PDPParameters.CUT_OFF_VALUE1S); 130 } 131 132 if (maximum_value > PDPParameters.CUT_OFF_VALUE1) { 133 /* 134 avd=(domains.get(Si).avd+domains.get(Sj).avd)/2; 135 */ 136 if(verbose) System.out.println(" Criteria 1 matched"); 137 if(verbose) System.out.println(String.format(" maximum_value = %f", maximum_value)); 138 if(verbose) System.out.println(String.format(" Si = %d Sj = %d ", Si, Sj)); 139 domains = combine(domains,Si, Sj, maximum_value); 140 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 141 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 142 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 143 /* 144 domains.get(Si).avd=domcont(domains.get(Si)); 145 domains.get(Sj).avd=domcont(domains.get(Sj)); 146 */ 147 if(verbose) System.out.println(String.format(" Listing the domains after combining...")); 148 if(verbose) listdomains (domains); 149 } 150 else if (maximum_valuem > PDPParameters.CUT_OFF_VALUE1M) { 151 /* 152 avd=(domains[Sim].avd+domains[Sjm].avd)/2; 153 */ 154 if(verbose) System.out.println(" Criteria 2 matched"); 155 if(verbose) System.out.println(String.format(" maximum_values = %f", maximum_valuem)); 156 if(verbose) System.out.println(String.format(" Sim = %d Sjm = %d", Sim, Sjm)); 157 domains = combine(domains, Sim, Sjm, maximum_valuem); 158 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 159 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 160 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 161 /* 162 domains[Sim].avd=domcont(domains[Sim]); 163 domains[Sjm].avd=domcont(domains[Sjm]); 164 */ 165 if(verbose) System.out.println(String.format(" Listing the domains after combining...")); 166 if(verbose) listdomains (domains); 167 } 168 else if (maximum_values > PDPParameters.CUT_OFF_VALUE1S) { 169 /* 170 avd=(domains[Sis].avd+domains[Sjs].avd)/2; 171 */ 172 if(verbose) System.out.println(" Criteria 3 matched"); 173 if(verbose) System.out.println(String.format(" maximum_values = %f", maximum_values)); 174 if(verbose) System.out.println(String.format(" Sis = %d Sjs = %d", Sis, Sjs)); 175 domains = combine(domains, Sis, Sjs, maximum_values); 176 maximum_value = PDPParameters.CUT_OFF_VALUE1-.1; 177 maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1; 178 maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1; 179 /* 180 domains[Sis].avd=domcont(domains[Sis]); 181 domains[Sjs].avd=domcont(domains[Sjs]); 182 */ 183 if(verbose) System.out.println(String.format(" Listing the domains after combining...")); 184 if(verbose) listdomains(domains); 185 } 186 else { 187 if(verbose) System.out.println(String.format(" Maximum value is less than cut off value. (max:" + maximum_value+")" )); 188 maximum_value = -1.0; 189 maximum_values = -1.0; 190 maximum_valuem = -1.0; 191 192 } 193 } while ( maximum_value > 0.0||maximum_values>0.0||maximum_valuem>0.0); 194 195 if(verbose) System.out.println(String.format(" The domains are:")); 196 if(verbose) listdomains(domains); 197 return domains; 198 } 199 200 201 202 private static long getTotalContacts(List<Domain> domains, 203 PDPDistanceMatrix pdpDistMatrix, Domain i, Domain j) { 204 long total_contacts=0; 205 206 207 208 209 for(int k=0;k<i.nseg;k++) { 210 for(int l=0;l<j.nseg;l++) { 211 long contacts = calc_S(j.getSegmentAtPos(l).getFrom(), 212 j.getSegmentAtPos(l).getTo(), 213 i.getSegmentAtPos(k).getFrom(), 214 i.getSegmentAtPos(k).getTo(), 215 pdpDistMatrix); 216 total_contacts += contacts; 217 } 218 } 219 return total_contacts; 220 } 221 222 223 224 private static List<Domain> combine(List<Domain> domains,int Si, int Sj, double maximum_value) { 225 226 if ( verbose) 227 System.out.println(" +++ combining domains " + Si + " " + Sj); 228 229 List<Domain> newdoms = new ArrayList<Domain>(); 230 231 //int ndom = domains.size(); 232 for(int i=0;i<domains.get(Sj).nseg;i++) { 233 domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setFrom(domains.get(Sj).getSegmentAtPos(i).getFrom()); 234 domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setTo(domains.get(Sj).getSegmentAtPos(i).getTo()); 235 domains.get(Si).nseg++; 236 } 237 domains.get(Si).size+=domains.get(Sj).size; 238 239 240 for(int i=0;i<domains.get(ndom-1).nseg;i++) { 241 domains.get(Sj).getSegmentAtPos(i).setFrom(domains.get(ndom-1).getSegmentAtPos(i).getFrom()); 242 domains.get(Sj).getSegmentAtPos(i).setTo(domains.get(ndom-1).getSegmentAtPos(i).getTo()); 243 244 } 245 for ( int i =0; i < domains.size(); i++){ 246 if ( i == Sj)continue; 247 newdoms.add(domains.get(i)); 248 } 249 domains.get(Sj).size=domains.get(ndom-1).size; 250 domains.get(Sj).nseg=domains.get(ndom-1).nseg; 251 252 ndom--; 253 return newdoms; 254 255 } 256 257 private static long calc_S (int a1,int b1,int a2,int b2, PDPDistanceMatrix pdpDistMatrix) 258 { 259 260 long contacts = 0; 261 262 int[][] dist = pdpDistMatrix.getDist(); 263 264 for(int i=a1;i<=b1;i++) 265 for(int j=a2;j<=b2;j++) 266 contacts+=dist[i][j]; 267 268 return contacts; 269 } 270 271 private static final void listdomains(List<Domain> domains){ 272 273 int i = -1; 274 for ( Domain dom : domains){ 275 i++; 276 System.out.println("DOMAIN:" + i + " size:" + dom.size + " " + dom.score); 277 List<Segment> segments = dom.getSegments(); 278 279 for ( Segment s : segments){ 280 System.out.println(" Segment: " + s); 281 282 } 283 } 284 } 285}