001package org.biojavax.bio.phylo; 002 003import java.io.*; 004import java.lang.*; 005import java.util.*; 006import java.util.ArrayList; 007import java.util.List; 008 009import org.biojavax.bio.phylo.io.nexus.*; 010import org.jgrapht.*; 011import org.jgrapht.graph.*; 012 013 014/* 015 * Phylogeny reconstruction methods based on distance 016 * 017 * @author Bohyun Lee 018 */ 019 020public class DistanceBasedTreeMethod { 021 022 /* 023 * 024 * generating a tree graph that has been generated by UPGMA model 025 * 026 * @ param t 027 * Nexus TaxaBlock, contains number of taxa, taxa labels 028 * 029 * @ param ch 030 * Nexus CharactersBlock, contains sequence information 031 * 032 * @ returns the generated graph in JGraphT type 033 */ 034 035 public static WeightedGraph<String, DefaultWeightedEdge> Upgma(TaxaBlock t, CharactersBlock ch){ 036 037 String v1, v2, v3; 038 int index_x = 0, index_y = 0, p_index = 0; 039 WeightedGraph<String, DefaultWeightedEdge> jgrapht = new SimpleWeightedGraph<String, DefaultWeightedEdge>(DefaultWeightedEdge.class); 040 041 int NTax = t.getDimensionsNTax(); 042 List labels = t.getTaxLabels(); 043 044 String [] seq = new String[NTax]; 045 double [][] distance = new double[NTax][]; 046 047 for(int i = 0; i < NTax; i++){ 048 seq[i] = ""; 049 distance[i] = new double[NTax]; 050 } 051 052 for (Iterator i = labels.iterator(); i.hasNext(); ) { 053 String taxa = (String)i.next(); 054 List matrix = ch.getMatrixData(taxa); 055 056 for (Iterator j = matrix.iterator(); j.hasNext(); ) { 057 Object elem = j.next(); 058 059 if (elem instanceof Set) { 060 061 // This is a curly-braces {} enclosed 062 // set of values from the matrix. 063 Set data = (Set)elem; 064 065 } else if (elem instanceof List) { 066 067 // This is a round-braces () enclosed 068 // set of values from the matrix. 069 070 List data = (List)elem; 071 } else { 072 // Assume it's a string. 073 074 String data = elem.toString(); 075 076 if(data != null && data != " ") 077 seq[labels.indexOf(taxa)] += data; 078 } 079 } 080 } 081 082 // build initial distance matrix 083 for( int i = 0; i < NTax; i++){ 084 for(int j = 0; j < NTax; j++){ 085 if(i == j) 086 distance[i][j] = 0.0; 087 else 088 distance[i][j] = MultipleHitCorrection.JukesCantor(seq[i], seq[j]); 089 } 090 } 091 092 do{ 093 //find minimum distance pair 094 double min_d = distance[0][1]; 095 for( int i = 0; i < NTax; i++){ 096 for(int j = i + 1; j < NTax; j++){ 097 if( min_d >= distance[i][j]){ 098 min_d = distance[i][j]; 099 index_x = i; 100 index_y = j; 101 } 102 } 103 } 104 105 // build a sub-tree by using jgrapht 106 v1 = (String) labels.get(index_x); 107 v2 = "p" + p_index; 108 v3 = (String) labels.get(index_y); 109 110 jgrapht.addVertex(v1); 111 jgrapht.addVertex(v2); 112 jgrapht.addVertex(v3); 113 jgrapht.addEdge(v1,v2); 114 jgrapht.addEdge(v2,v3); 115 116 p_index++; 117 118 //System.out.println(jgrapht.toString()); 119 120 //collapse a min_distance pair and re-build distance matrix 121 for(int i = 0; i < NTax; i++){ 122 for(int j = i; j < NTax; j++){ 123 if(i == j){ 124 distance[i][j] = 0.0; 125 }else if(i == index_x && j == index_y){ 126 for(int k = j+1; k < NTax; k++){ 127 128 distance[i][j] = (distance[k][i] + distance[k][j])/2; 129 distance[j][i] = distance[i][j]; 130 labels.set(i, (Object) v2); 131 labels.set(j, labels.get(k)); 132 } 133 134 labels.set(NTax-1, (Object) null); 135 }else if(j == index_x){ 136 for(int k = j+1; k < NTax; k++){ 137 if(k == index_y){ 138 distance[i][j] = (distance[i][j] + distance[i][k])/2; 139 distance[j][i] = distance[i][j]; 140 labels.set(index_x, (Object) v2); 141 labels.set(index_y, (Object) null); 142 } 143 } 144 } 145 146 } 147 } 148 149 NTax--; 150 151 //iterate until tree is completed! 152 }while(NTax > 1); 153 154 return jgrapht; 155 } 156 157 158 /* 159 * 160 * generating a tree graph that has been generated by Neighbor-Joining model 161 * 162 * @ param t 163 * Nexus TaxaBlock, contains number of taxa, taxa labels 164 * 165 * @ param ch 166 * Nexus CharactersBlock, contains sequence information 167 * 168 * @ returns the generated graph in JGraphT type 169 */ 170 171 172 public static WeightedGraph<String, DefaultWeightedEdge> NeighborJoining(TaxaBlock t, CharactersBlock ch){ 173 174 String v1, v2, v3; 175 int index_x = 0, index_y = 0, p_index = 0; 176 WeightedGraph<String, DefaultWeightedEdge> jgrapht = new SimpleWeightedGraph<String, DefaultWeightedEdge>(DefaultWeightedEdge.class); 177 178 int NTax = t.getDimensionsNTax(); 179 List labels = t.getTaxLabels(); 180 181 String [] seq = new String[NTax]; 182 double []net_divergence = new double[NTax]; 183 double [][] raw_distance = new double[NTax][]; 184 double [][] distance = new double[NTax][]; 185 186 for(int i = 0; i < NTax; i++){ 187 seq[i] = ""; 188 raw_distance[i] = new double[NTax]; 189 distance[i] = new double[NTax]; 190 } 191 192 for (Iterator i = labels.iterator(); i.hasNext(); ) { 193 String taxa = (String)i.next(); 194 List matrix = ch.getMatrixData(taxa); 195 196 for (Iterator j = matrix.iterator(); j.hasNext(); ) { 197 Object elem = j.next(); 198 199 if (elem instanceof Set) { 200 // This is a curly-braces {} enclosed 201 // set of values from the matrix. 202 203 Set data = (Set)elem; 204 205 } else if (elem instanceof List) { 206 207 // This is a round-braces () enclosed 208 // set of values from the matrix. 209 210 List data = (List)elem; 211 212 } else { 213 214 // Assume it's a string. 215 String data = elem.toString(); 216 217 if(data != null && data != " ") 218 seq[labels.indexOf(taxa)] += data; 219 } 220 } 221 } 222 223 224 // build initial distance matrix 225 for( int i = 0; i < NTax; i++){ 226 for(int j = 0; j< NTax; j++){ 227 if(i == j) 228 raw_distance[i][j] = 0.0; 229 else 230 raw_distance[i][j] = MultipleHitCorrection.JukesCantor(seq[i], seq[j]); 231 232 net_divergence[i] =+ raw_distance[i][j]; 233 } 234 } 235 236 //iterate until tree is completed! 237 do{ 238 // calculate distance matrix from raw_distances & net divergence 239 for(int i = 0; i < NTax; i++){ 240 for(int j = 0; j < NTax; j++){ 241 if(i == j) 242 distance[i][j] = 0.0; 243 else 244 distance[i][j] = raw_distance[i][j] - ((net_divergence[i] + net_divergence[j])/2) ; 245 } 246 } 247 248 //find minimum distance pair 249 double min_d = distance[0][1]; 250 for( int i = 0; i < NTax; i++){ 251 for(int j = i + 1; j < NTax; j++){ 252 if( min_d >= distance[i][j]){ 253 min_d = distance[i][j]; 254 index_x = i; 255 index_y = j; 256 } 257 } 258 } 259 260 // build a sub-tree by using jgrapht 261 v1 = (String) labels.get(index_x); 262 v2 = "p" + p_index; 263 v3 = (String) labels.get(index_y); 264 265 jgrapht.addVertex(v1); 266 jgrapht.addVertex(v2); 267 jgrapht.addVertex(v3); 268 jgrapht.addEdge(v1,v2); 269 jgrapht.addEdge(v2,v3); 270 271 //adding weight to the edge 272 jgrapht.setEdgeWeight(jgrapht.getEdge(v1,v2), ((raw_distance[index_x][index_y]/2) + (net_divergence[index_x] - net_divergence[index_y])/(2*(NTax-2))) ); 273 jgrapht.setEdgeWeight(jgrapht.getEdge(v2,v3), raw_distance[index_x][index_y] - ((raw_distance[index_x][index_y]/2) + (net_divergence[index_x] - net_divergence[index_y])/(2*(NTax-2))) ); 274 275 p_index++; 276 277 //System.out.println(jgrapht.toString()); 278 279 //collapse a min_distance pair and re-build distance matrix 280 for(int i = 0; i < NTax; i++){ 281 for(int j = i; j < NTax; j++){ 282 if(i == j){ 283 distance[i][j] = 0.0; 284 }else if(i == index_x && j == index_y){ 285 for(int k = j+1; k < NTax; k++){ 286 raw_distance[i][j] = (raw_distance[k][i] + raw_distance[k][j] - raw_distance[index_x][index_y])/2; 287 raw_distance[j][i] = raw_distance[i][j]; 288 labels.set(i, (Object) v2); 289 labels.set(j, (Object) labels.get(k)); 290 } 291 labels.set(NTax-1, (Object) null); 292 }else if(j == index_x){ 293 for(int k = j+1; k < NTax; k++){ 294 if(k == index_y){ 295 raw_distance[i][j] = (raw_distance[i][j] + raw_distance[i][k] -raw_distance[index_x][index_y])/2; 296 raw_distance[j][i] = raw_distance[i][j]; 297 labels.set(index_x, (Object) v2); 298 labels.set(index_y, (Object) null); 299 } 300 } 301 } 302 303 } 304 } 305 306 NTax--; 307 308 //iterate until tree is completed! 309 }while(NTax > 1); 310 return jgrapht; 311 } 312} 313