001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.domain.pdp;
022
023import java.util.ArrayList;
024import java.util.List;
025
026
027
028public class ClusterDomains {
029
030
031        static private boolean verbose = CutDomain.verbose;
032
033        private static int ndom;
034        public static List<Domain> cluster(List<Domain> domains, PDPDistanceMatrix pdpDistMatrix){
035
036                ndom = domains.size();
037
038
039                int Si = -1;
040                int Sj = -1;
041                int Sis = -1;
042                int Sjs = -1;
043                int Sim = -1;
044                int Sjm = -1;
045
046                long total_max_contacts = 0;
047
048                double maximum_values = PDPParameters.CUT_OFF_VALUE1S;
049                double maximum_valuem = PDPParameters.CUT_OFF_VALUE1M;
050                double maximum_value  = PDPParameters.CUT_OFF_VALUE1;
051
052
053
054                if (ndom < 2) return domains;
055
056                /*
057        for(i=0;i<ndom;i++)
058                domains.get(i).avd=domcont(domains.get(i));
059                 */
060
061                if(verbose) listdomains (domains);
062
063                do {
064                        for(int i=0;i<ndom-1;i++) {
065                                for(int j=i+1;j<ndom;j++) {
066                                        Domain d1 = domains.get(i);
067                                        Domain d2 = domains.get(j);
068                                        long total_contacts = getTotalContacts(domains,pdpDistMatrix,d1,d2);
069                                        System.out.println(" pos: d1:" + i + " vs d2:" +j + " d1:" + d1.getSegmentAtPos(0).getFrom() + "-" + d1.getSegmentAtPos(0).getTo() + " " +  d2.getSegmentAtPos(0).getFrom() + "-" + d2.getSegmentAtPos(0).getTo() + " " + total_contacts);
070                                        int size1dom1=domains.get(i).size;
071                                        int size2dom2=domains.get(j).size;
072                                        double minDomSize=Math.min(size1dom1,size2dom2);
073                                        double maxDomSize=Math.max(size1dom1,size2dom2);
074
075
076                                        // set some limits on how big the domains can get
077                                        if(minDomSize>150&&maxDomSize>1.5*minDomSize) maxDomSize=1.5*minDomSize;
078                                        else if(maxDomSize>2*minDomSize) maxDomSize=2*minDomSize;
079
080                                        long size1= Double.valueOf(Math.min(PDPParameters.MAXSIZE,minDomSize)).longValue();
081                                        long size2= Double.valueOf(Math.min(PDPParameters.MAXSIZE,maxDomSize)).longValue();
082                                        minDomSize=Math.min(Math.pow(minDomSize,1.6/3)+PDPParameters.RG1,Math.pow(minDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1);
083                                        maxDomSize=Math.min(Math.pow(maxDomSize,1.6/3)+PDPParameters.RG1,Math.pow(maxDomSize,1.4/3)+Math.pow(PDPParameters.TD1,1.6/3)+PDPParameters.RG1);
084
085                                        /*
086                                total_max_contacts = 10.+
087        (long)( (10.*(size1)/(size1+10.)+(size1)*pow((double)(size1),(double)(2./3.))/(size1+10.))*(10.*(size2)/(size2+10.)+(size2)*pow((double)(size2),(double)(2./3.))/(size2+10.)));
088                                total_max_contacts = (min(200,size1))*(min(200,size2))/4;
089                                         */
090                                        /*
091                                total_max_contacts = min(x*y,MAXCONT);
092                                         */
093                                        total_max_contacts=Double.valueOf(minDomSize*maxDomSize*10).longValue();
094                                        if(size1>130) total_max_contacts=Double.valueOf(minDomSize*maxDomSize*9).longValue();
095                                        /*
096        avd=(domains.get(i).avd+domains.get(j).avd)/2;
097
098                                S_value=(double)total_contacts/total_max_contacts/avd;
099                                         */
100
101                                        double S_value= total_contacts/(double)total_max_contacts;
102                                        if(verbose) {
103                                                System.out.printf(" size1=%d size2=%d minDomSize=%5.2f maxDomSize=%5.2f total_contacts = %d %n", size1,size2,minDomSize,maxDomSize,total_contacts);
104                                                System.out.printf(" total_contacts = %d total_max_contacts = %d%n", total_contacts, total_max_contacts);
105                                                System.out.printf(" maximum_value = %f S_value = %f%n%n",maximum_value, S_value);
106                                        }
107
108                                        if (S_value > maximum_value) {
109                                                maximum_value = S_value;
110                                                Si = i;
111                                                Sj = j;
112                                        }
113                                        if (S_value > maximum_valuem&&size1<70) {
114                                                maximum_valuem = S_value;
115                                                Sim = i;
116                                                Sjm = j;
117                                        }
118                                        if (S_value > maximum_values&&size1<52) {
119                                                maximum_values = S_value;
120                                                Sis = i;
121                                                Sjs = j;
122                                        }
123                                        total_contacts = 0;
124                                        total_max_contacts = 0;
125                                }
126                        }
127
128                        if ( verbose) {
129                                System.out.println("Check for combining: " + maximum_value  + " 1 :" + PDPParameters.CUT_OFF_VALUE1);
130                                System.out.println("                     " + maximum_valuem + " 1M:" + PDPParameters.CUT_OFF_VALUE1M );
131                                System.out.println("                     " + maximum_values + " 1S:" + PDPParameters.CUT_OFF_VALUE1S);
132                        }
133
134                        if (maximum_value > PDPParameters.CUT_OFF_VALUE1) {
135                                /*
136                        avd=(domains.get(Si).avd+domains.get(Sj).avd)/2;
137                                 */
138                                if(verbose) System.out.println(" Criteria 1 matched");
139                                if(verbose) System.out.printf(" maximum_value = %f%n", maximum_value);
140                                if(verbose) System.out.printf(" Si = %d Sj = %d %n", Si, Sj);
141                                domains = combine(domains,Si, Sj, maximum_value);
142                                maximum_value = PDPParameters.CUT_OFF_VALUE1-.1;
143                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
144                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
145                                /*
146                        domains.get(Si).avd=domcont(domains.get(Si));
147                        domains.get(Sj).avd=domcont(domains.get(Sj));
148                                 */
149                                if(verbose) System.out.println(String.format(" Listing the domains after combining..."));
150                                if(verbose) listdomains (domains);
151                        }
152                        else if (maximum_valuem > PDPParameters.CUT_OFF_VALUE1M) {
153                                /*
154                        avd=(domains[Sim].avd+domains[Sjm].avd)/2;
155                                 */
156                                if(verbose) System.out.println(" Criteria 2 matched");
157                                if(verbose) System.out.printf(" maximum_values = %f%n", maximum_valuem);
158                                if(verbose) System.out.printf(" Sim = %d Sjm = %d%n", Sim, Sjm);
159                                domains = combine(domains, Sim, Sjm, maximum_valuem);
160                                maximum_value =  PDPParameters.CUT_OFF_VALUE1-.1;
161                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
162                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
163                                /*
164                        domains[Sim].avd=domcont(domains[Sim]);
165                        domains[Sjm].avd=domcont(domains[Sjm]);
166                                 */
167                                if(verbose) System.out.println(String.format(" Listing the domains after combining..."));
168                                if(verbose) listdomains (domains);
169                        }
170                        else if (maximum_values > PDPParameters.CUT_OFF_VALUE1S) {
171                                /*
172                        avd=(domains[Sis].avd+domains[Sjs].avd)/2;
173                                 */
174                                if(verbose) System.out.println(" Criteria 3 matched");
175                                if(verbose) System.out.printf(" maximum_values = %f%n", maximum_values);
176                                if(verbose) System.out.printf(" Sis = %d Sjs = %d%n", Sis, Sjs);
177                                domains = combine(domains, Sis, Sjs, maximum_values);
178                                maximum_value = PDPParameters.CUT_OFF_VALUE1-.1;
179                                maximum_values = PDPParameters.CUT_OFF_VALUE1S-.1;
180                                maximum_valuem = PDPParameters.CUT_OFF_VALUE1M-.1;
181                                /*
182                        domains[Sis].avd=domcont(domains[Sis]);
183                        domains[Sjs].avd=domcont(domains[Sjs]);
184                                 */
185                                if(verbose) System.out.println(" Listing the domains after combining...");
186                                if(verbose) listdomains(domains);
187                        }
188                        else {
189                                if(verbose) System.out.printf(" Maximum value is less than cut off value. (max:%f)%n", maximum_value);
190                                maximum_value = -1.0;
191                                maximum_values = -1.0;
192                                maximum_valuem = -1.0;
193
194                        }
195                } while ( maximum_value > 0.0||maximum_values>0.0||maximum_valuem>0.0);
196
197                if(verbose) System.out.println(String.format(" The domains are:"));
198                if(verbose) listdomains(domains);
199                return domains;
200        }
201
202
203
204        private static long getTotalContacts(List<Domain> domains,
205                        PDPDistanceMatrix pdpDistMatrix, Domain i, Domain j) {
206                long total_contacts=0;
207
208
209
210
211                for(int k=0;k<i.nseg;k++) {
212                        for(int l=0;l<j.nseg;l++) {
213                                long contacts = calc_S(j.getSegmentAtPos(l).getFrom(),
214                                                j.getSegmentAtPos(l).getTo(),
215                                                i.getSegmentAtPos(k).getFrom(),
216                                                i.getSegmentAtPos(k).getTo(),
217                                                pdpDistMatrix);
218                                total_contacts += contacts;
219                        }
220                }
221                return total_contacts;
222        }
223
224
225
226        private static List<Domain> combine(List<Domain> domains,int Si, int Sj, double maximum_value) {
227
228                if ( verbose)
229                        System.out.println("  +++  combining domains " + Si + " " + Sj);
230
231                List<Domain> newdoms = new ArrayList<>();
232
233                //int ndom = domains.size();
234                for(int i=0;i<domains.get(Sj).nseg;i++) {
235                        domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setFrom(domains.get(Sj).getSegmentAtPos(i).getFrom());
236                        domains.get(Si).getSegmentAtPos(domains.get(Si).nseg).setTo(domains.get(Sj).getSegmentAtPos(i).getTo());
237                        domains.get(Si).nseg++;
238                }
239                domains.get(Si).size+=domains.get(Sj).size;
240
241
242                for(int i=0;i<domains.get(ndom-1).nseg;i++) {
243                        domains.get(Sj).getSegmentAtPos(i).setFrom(domains.get(ndom-1).getSegmentAtPos(i).getFrom());
244                        domains.get(Sj).getSegmentAtPos(i).setTo(domains.get(ndom-1).getSegmentAtPos(i).getTo());
245
246                }
247                for ( int i =0; i < domains.size(); i++){
248                        if ( i == Sj)continue;
249                        newdoms.add(domains.get(i));
250                }
251                domains.get(Sj).size=domains.get(ndom-1).size;
252                domains.get(Sj).nseg=domains.get(ndom-1).nseg;
253
254                ndom--;
255                return newdoms;
256
257        }
258
259        private static long calc_S (int a1,int b1,int a2,int b2, PDPDistanceMatrix pdpDistMatrix)
260        {
261
262                long contacts = 0;
263
264                int[][] dist = pdpDistMatrix.getDist();
265
266                for(int i=a1;i<=b1;i++)
267                        for(int j=a2;j<=b2;j++)
268                                contacts+=dist[i][j];
269
270                return contacts;
271        }
272
273        private static final void listdomains(List<Domain> domains){
274
275                int i = -1;
276                for ( Domain dom : domains){
277                        i++;
278                        System.out.println("DOMAIN:" + i + " size:" + dom.size + " " +  dom.score);
279                        List<Segment> segments = dom.getSegments();
280
281                        for ( Segment s : segments){
282                                System.out.println("   Segment: " + s);
283
284                        }
285                }
286        }
287}