001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.domain.pdp;
022
023import java.util.ArrayList;
024import java.util.List;
025
026
027
028public class ClusterDomains {
029
030
031        static private boolean verbose = CutDomain.verbose;
032
033        private static int ndom;
034        public static List<Domain> cluster(List<Domain> domains, PDPDistanceMatrix pdpDistMatrix){
035
036                ndom = domains.size();
037
038
039                int Si = -1;
040                int Sj = -1;
041                int Sis = -1;
042                int Sjs = -1;
043                int Sim = -1;
044                int Sjm = -1;
045
046                long total_max_contacts = 0;
047
048                double maximum_values = PDPParameters.CUT_OFF_VALUE1S;
049                double maximum_valuem = PDPParameters.CUT_OFF_VALUE1M;
050                double maximum_value  = PDPParameters.CUT_OFF_VALUE1;
051
052
053
054                if (ndom < 2) return domains;
055
056                /*
057        for(i=0;i<ndom;i++)
058                domains.get(i).avd=domcont(domains.get(i));
059                 */
060
061                if(verbose) listdomains (domains);
062
063                do {
064                        for(int i=0;i<ndom-1;i++) {
065                                for(int j=i+1;j<ndom;j++) {
066                                        Domain d1 = domains.get(i);
067                                        Domain d2 = domains.get(j);
068                                        long total_contacts = getTotalContacts(domains,pdpDistMatrix,d1,d2);
069                                        System.out.println(" pos: d1:" + i + " vs d2:" +j + " d1:" + d1.getSegmentAtPos(0).getFrom() + "-" + d1.getSegmentAtPos(0).getTo() + " " +  d2.getSegmentAtPos(0).getFrom() + "-" + d2.getSegmentAtPos(0).getTo() + " " + total_contacts);
070                                        int size1dom1=domains.get(i).size;
071                                        int size2dom2=domains.get(j).size;
072                                        double minDomSize=Math.min(size1dom1,size2dom2);
073                                        double maxDomSize=Math.max(size1dom1,size2dom2);
074
075
076                                        // set some limits on how big the domains can get
077                                        if(minDomSize>150&&maxDomSize>1.5*minDomSize) maxDomSize=1.5*minDomSize;
078                                        else if(maxDomSize>2*minDomSize) maxDomSize=2*minDomSize;
079
080                                        long size1= new Double(Math.min(PDPParameters.MAXSIZE,minDomSize)).longValue();
081                                        long size2= new Double(Math.min(PDPParameters.MAXSIZE,maxDomSize)).longValue();
082                                        minDomSize=Math.min(Math.pow(minDomSize,1.6/3)+PDPParameters.RG1,Math.pow(minDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1);
083                                        maxDomSize=Math.min(Math.pow(maxDomSize,1.6/3)+PDPParameters.RG1,Math.pow(maxDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1);
084
085                                        /*
086                                total_max_contacts = 10.+
087        (long)( (10.*(size1)/(size1+10.)+(size1)*pow((double)(size1),(double)(2./3.))/(size1+10.))*(10.*(size2)/(size2+10.)+(size2)*pow((double)(size2),(double)(2./3.))/(size2+10.)));
088                                total_max_contacts = (min(200,size1))*(min(200,size2))/4;
089                                         */
090                                        /*
091                                total_max_contacts = min(x*y,MAXCONT);
092                                         */
093                                        total_max_contacts=new Double(minDomSize*maxDomSize*10).longValue();
094                                        if(size1>130) total_max_contacts=new Double(minDomSize*maxDomSize*9).longValue();
095                                        /*
096        avd=(domains.get(i).avd+domains.get(j).avd)/2;
097
098                                S_value=(double)total_contacts/total_max_contacts/avd;
099                                         */
100
101                                        double S_value= total_contacts/(double)total_max_contacts;
102                                        if(verbose) System.out.println(String.format(" size1=%d size2=%d minDomSize=%5.2f maxDomSize=%5.2f total_contacts = %d ", size1,size2,minDomSize,maxDomSize,total_contacts));
103                                        if(verbose) System.out.println(String.format(" total_contacts = %d total_max_contacts = %d", total_contacts, total_max_contacts));
104                                        if(verbose) System.out.println(String.format(" maximum_value = %f S_value = %f\n",maximum_value, S_value));
105
106                                        if (S_value > maximum_value) {
107                                                maximum_value = S_value;
108                                                Si = i;
109                                                Sj = j;
110                                        }
111                                        if (S_value > maximum_valuem&&size1<70) {
112                                                maximum_valuem = S_value;
113                                                Sim = i;
114                                                Sjm = j;
115                                        }
116                                        if (S_value > maximum_values&&size1<52) {
117                                                maximum_values = S_value;
118                                                Sis = i;
119                                                Sjs = j;
120                                        }
121                                        total_contacts = 0;
122                                        total_max_contacts = 0;
123                                }
124                        }
125
126                        if ( verbose) {
127                                System.out.println("Check for combining: " + maximum_value  + " 1 :" + PDPParameters.CUT_OFF_VALUE1);
128                                System.out.println("                     " + maximum_valuem + " 1M:" + PDPParameters.CUT_OFF_VALUE1M );
129                                System.out.println("                     " + maximum_values + " 1S:" + PDPParameters.CUT_OFF_VALUE1S);
130                        }
131
132                        if (maximum_value > PDPParameters.CUT_OFF_VALUE1) {
133                                /*
134                        avd=(domains.get(Si).avd+domains.get(Sj).avd)/2;
135                                 */
136                                if(verbose) System.out.println(" Criteria 1 matched");
137                                if(verbose) System.out.println(String.format(" maximum_value = %f", maximum_value));
138                                if(verbose) System.out.println(String.format(" Si = %d Sj = %d ", Si, Sj));
139                                domains = combine(domains,Si, Sj, maximum_value);
140                                maximum_value = PDPParameters.CUT_OFF_VALUE1-.1;
141                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
142                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
143                                /*
144                        domains.get(Si).avd=domcont(domains.get(Si));
145                        domains.get(Sj).avd=domcont(domains.get(Sj));
146                                 */
147                                if(verbose) System.out.println(String.format(" Listing the domains after combining..."));
148                                if(verbose) listdomains (domains);
149                        }
150                        else if (maximum_valuem > PDPParameters.CUT_OFF_VALUE1M) {
151                                /*
152                        avd=(domains[Sim].avd+domains[Sjm].avd)/2;
153                                 */
154                                if(verbose) System.out.println(" Criteria 2 matched");
155                                if(verbose) System.out.println(String.format(" maximum_values = %f", maximum_valuem));
156                                if(verbose) System.out.println(String.format(" Sim = %d Sjm = %d", Sim, Sjm));
157                                domains = combine(domains, Sim, Sjm, maximum_valuem);
158                                maximum_value =  PDPParameters.CUT_OFF_VALUE1-.1;
159                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
160                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
161                                /*
162                        domains[Sim].avd=domcont(domains[Sim]);
163                        domains[Sjm].avd=domcont(domains[Sjm]);
164                                 */
165                                if(verbose) System.out.println(String.format(" Listing the domains after combining..."));
166                                if(verbose) listdomains (domains);
167                        }
168                        else if (maximum_values > PDPParameters.CUT_OFF_VALUE1S) {
169                                /*
170                        avd=(domains[Sis].avd+domains[Sjs].avd)/2;
171                                 */
172                                if(verbose) System.out.println(" Criteria 3 matched");
173                                if(verbose) System.out.println(String.format(" maximum_values = %f", maximum_values));
174                                if(verbose) System.out.println(String.format(" Sis = %d Sjs = %d", Sis, Sjs));
175                                domains = combine(domains, Sis, Sjs, maximum_values);
176                                maximum_value = PDPParameters.CUT_OFF_VALUE1-.1;
177                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
178                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
179                                /*
180                        domains[Sis].avd=domcont(domains[Sis]);
181                        domains[Sjs].avd=domcont(domains[Sjs]);
182                                 */
183                                if(verbose) System.out.println(String.format(" Listing the domains after combining..."));
184                                if(verbose) listdomains(domains);
185                        }
186                        else {
187                                if(verbose) System.out.println(String.format(" Maximum value is less than cut off value. (max:" + maximum_value+")" ));
188                                maximum_value = -1.0;
189                                maximum_values = -1.0;
190                                maximum_valuem = -1.0;
191
192                        }
193                } while ( maximum_value > 0.0||maximum_values>0.0||maximum_valuem>0.0);
194
195                if(verbose) System.out.println(String.format(" The domains are:"));
196                if(verbose) listdomains(domains);
197                return domains;
198        }
199
200
201
202        private static long getTotalContacts(List<Domain> domains,
203                        PDPDistanceMatrix pdpDistMatrix, Domain i, Domain j) {
204                long total_contacts=0;
205
206
207
208
209                for(int k=0;k<i.nseg;k++) {
210                        for(int l=0;l<j.nseg;l++) {
211                                long contacts = calc_S(j.getSegmentAtPos(l).getFrom(),
212                                                j.getSegmentAtPos(l).getTo(),
213                                                i.getSegmentAtPos(k).getFrom(),
214                                                i.getSegmentAtPos(k).getTo(),
215                                                pdpDistMatrix);
216                                total_contacts += contacts;
217                        }
218                }
219                return total_contacts;
220        }
221
222
223
224        private static List<Domain> combine(List<Domain> domains,int Si, int Sj, double maximum_value) {
225
226                if ( verbose)
227                        System.out.println("  +++  combining domains " + Si + " " + Sj);
228
229                List<Domain> newdoms = new ArrayList<Domain>();
230
231                //int ndom = domains.size();
232                for(int i=0;i<domains.get(Sj).nseg;i++) {
233                        domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setFrom(domains.get(Sj).getSegmentAtPos(i).getFrom());
234                        domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setTo(domains.get(Sj).getSegmentAtPos(i).getTo());
235                        domains.get(Si).nseg++;
236                }
237                domains.get(Si).size+=domains.get(Sj).size;
238
239
240                for(int i=0;i<domains.get(ndom-1).nseg;i++) {
241                        domains.get(Sj).getSegmentAtPos(i).setFrom(domains.get(ndom-1).getSegmentAtPos(i).getFrom());
242                        domains.get(Sj).getSegmentAtPos(i).setTo(domains.get(ndom-1).getSegmentAtPos(i).getTo());
243
244                }
245                for ( int i =0; i < domains.size(); i++){
246                        if ( i == Sj)continue;
247                        newdoms.add(domains.get(i));
248                }
249                domains.get(Sj).size=domains.get(ndom-1).size;
250                domains.get(Sj).nseg=domains.get(ndom-1).nseg;
251
252                ndom--;
253                return newdoms;
254
255        }
256
257        private static long calc_S (int a1,int b1,int a2,int b2, PDPDistanceMatrix pdpDistMatrix)
258        {
259
260                long contacts = 0;
261
262                int[][] dist = pdpDistMatrix.getDist();
263
264                for(int i=a1;i<=b1;i++)
265                        for(int j=a2;j<=b2;j++)
266                                contacts+=dist[i][j];
267
268                return contacts;
269        }
270
271        private static final void listdomains(List<Domain> domains){
272
273                int i = -1;
274                for ( Domain dom : domains){
275                        i++;
276                        System.out.println("DOMAIN:" + i + " size:" + dom.size + " " +  dom.score);
277                        List<Segment> segments = dom.getSegments();
278
279                        for ( Segment s : segments){
280                                System.out.println("   Segment: " + s);
281
282                        }
283                }
284        }
285}