001package org.biojavax.bio.phylo;
002
003 /*
004  *   MultipleHitCorrection methods for phylogeny inference
005  *
006  *   @author Bohyun Lee
007  */
008public class MultipleHitCorrection {
009         
010
011       /*               calculate distance between two sequences (pairwise comparison) based on Jukes-Cantor model
012          *
013          *             @param taxa1
014          *                             first sequence 
015          *
016          *             @param taxa2
017          *                             second sequnce 
018          *
019          *             @returns        the calculated number in double type
020          */
021         public static double JukesCantor(String taxa1, String taxa2){
022                        
023                taxa1 = taxa1.replace(" ", "");
024                taxa2 = taxa2.replace(" ", "");
025                
026                int length = taxa1.length();
027                
028                if(length == taxa2.length()){
029                        //only if sequence lengths are the same, run the JC method 
030                
031                        double counter = 0.0;   
032
033                        //for every single base pairs
034                        for( int i = 0 ; i < length; i++){
035                                //compare and increase the counter when it is not identical
036                                if(taxa1.charAt(i) != taxa2.charAt(i))
037                                        counter++;
038                        }
039                                                        
040                        //calculate proportion of mismatch in the sequence 
041                        //and, it will be used as the probability of those two taxa which will have diff. base pair at any given site
042                        double p = counter/ (double) length;    
043                        
044                        //calculate evolutionary distance between them (by the formula) and return it
045                        return (-0.75 * Math.log(1.0-(4.0/3.0)*p));
046                }else{
047                        System.out.println("Error: Sequence Length dose not match!\n");
048                        return 0.0;
049                }
050        }       
051        
052         /*             calculate distance between two sequences (pairwise comparison) based on kimura's-2parameter model
053          *
054          *             @param taxa1
055          *                             first sequence 
056          *
057          *             @param taxa2
058          *                             second sequnce 
059          *
060          *             @returns        the calculated number in double type
061          */
062        public static double KimuraTwoParameter(String taxa1, String taxa2){
063                
064                taxa1 = taxa1.replace(" ","");
065                taxa2 = taxa2.replace(" ","");
066
067                int length = taxa1.length();
068
069                if(length == taxa2.length()){
070                
071                        double counter1 = 0.0;
072                        double counter2 = 0.0;
073
074                        for( int i = 0; i < length; i++){
075                                
076                                //if two taxa have diff. base-pair at a site
077                                if(taxa1.charAt(i) != taxa2.charAt(i)){
078                                        
079                                        if((taxa1.charAt(i) == 'A' && taxa2.charAt(i) == 'G') || (taxa1.charAt(i) == 'G' && taxa2.charAt(i) == 'A')){
080                                                
081                                                //see if it is a transition between A and G, and if so increase counter1
082                                                counter1++;
083                                        }else if((taxa1.charAt(i) == 'T' && taxa2.charAt(i) == 'C') || (taxa1.charAt(i) == 'C' && taxa2.charAt(i) == 'T')){
084                                                
085                                                //see if it is a transition between C and T, and if so increase counter1
086                                                counter1++;
087                                        }else{
088
089                                                //if it is not transition, then increase counter2 for the transversion
090                                                counter2++;
091                                        }
092                                }
093                        }       
094
095                        //calculate p and q, based on counter 1 & counter 2
096                        double p = counter1 / (double) length;
097                        double q = counter2 / (double) length;
098
099                        //calculate the distance (by formula) and return it.
100                        return ( (0.5)*Math.log(1.0/(1.0 - 2.0*p - q)) + (0.25)*Math.log(1.0/(1.0 - 2.0*q)));   
101                }else{
102                        System.out.println("Error: Sequence Length dose not match!\n");
103                        return 0.0;
104                }
105        }
106
107}
108