001package org.biojavax.bio.phylo; 002 003 /* 004 * MultipleHitCorrection methods for phylogeny inference 005 * 006 * @author Bohyun Lee 007 */ 008public class MultipleHitCorrection { 009 010 011 /* calculate distance between two sequences (pairwise comparison) based on Jukes-Cantor model 012 * 013 * @param taxa1 014 * first sequence 015 * 016 * @param taxa2 017 * second sequnce 018 * 019 * @returns the calculated number in double type 020 */ 021 public static double JukesCantor(String taxa1, String taxa2){ 022 023 taxa1 = taxa1.replace(" ", ""); 024 taxa2 = taxa2.replace(" ", ""); 025 026 int length = taxa1.length(); 027 028 if(length == taxa2.length()){ 029 //only if sequence lengths are the same, run the JC method 030 031 double counter = 0.0; 032 033 //for every single base pairs 034 for( int i = 0 ; i < length; i++){ 035 //compare and increase the counter when it is not identical 036 if(taxa1.charAt(i) != taxa2.charAt(i)) 037 counter++; 038 } 039 040 //calculate proportion of mismatch in the sequence 041 //and, it will be used as the probability of those two taxa which will have diff. base pair at any given site 042 double p = counter/ (double) length; 043 044 //calculate evolutionary distance between them (by the formula) and return it 045 return (-0.75 * Math.log(1.0-(4.0/3.0)*p)); 046 }else{ 047 System.out.println("Error: Sequence Length dose not match!\n"); 048 return 0.0; 049 } 050 } 051 052 /* calculate distance between two sequences (pairwise comparison) based on kimura's-2parameter model 053 * 054 * @param taxa1 055 * first sequence 056 * 057 * @param taxa2 058 * second sequnce 059 * 060 * @returns the calculated number in double type 061 */ 062 public static double KimuraTwoParameter(String taxa1, String taxa2){ 063 064 taxa1 = taxa1.replace(" ",""); 065 taxa2 = taxa2.replace(" ",""); 066 067 int length = taxa1.length(); 068 069 if(length == taxa2.length()){ 070 071 double counter1 = 0.0; 072 double counter2 = 0.0; 073 074 for( int i = 0; i < length; i++){ 075 076 //if two taxa have diff. base-pair at a site 077 if(taxa1.charAt(i) != taxa2.charAt(i)){ 078 079 if((taxa1.charAt(i) == 'A' && taxa2.charAt(i) == 'G') || (taxa1.charAt(i) == 'G' && taxa2.charAt(i) == 'A')){ 080 081 //see if it is a transition between A and G, and if so increase counter1 082 counter1++; 083 }else if((taxa1.charAt(i) == 'T' && taxa2.charAt(i) == 'C') || (taxa1.charAt(i) == 'C' && taxa2.charAt(i) == 'T')){ 084 085 //see if it is a transition between C and T, and if so increase counter1 086 counter1++; 087 }else{ 088 089 //if it is not transition, then increase counter2 for the transversion 090 counter2++; 091 } 092 } 093 } 094 095 //calculate p and q, based on counter 1 & counter 2 096 double p = counter1 / (double) length; 097 double q = counter2 / (double) length; 098 099 //calculate the distance (by formula) and return it. 100 return ( (0.5)*Math.log(1.0/(1.0 - 2.0*p - q)) + (0.25)*Math.log(1.0/(1.0 - 2.0*q))); 101 }else{ 102 System.out.println("Error: Sequence Length dose not match!\n"); 103 return 0.0; 104 } 105 } 106 107} 108