001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phylo; 022 023import org.forester.evoinference.matrix.distance.DistanceMatrix; 024import org.forester.phylogeny.Phylogeny; 025import org.forester.phylogeny.PhylogenyNode; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029import java.util.HashMap; 030import java.util.HashSet; 031import java.util.List; 032import java.util.Set; 033 034/** 035 * Check the accuracy of a Distance Tree by least squares error (LSE) of the 036 * Tree branch lengths and the original Distance Matrix. 037 * 038 * @author Scooter Willis 039 * @author Aleix Lafita 040 * 041 */ 042public class DistanceTreeEvaluator { 043 044 private static final Logger logger = LoggerFactory 045 .getLogger(DistanceTreeEvaluator.class); 046 047 /** Prevent instantiation */ 048 private DistanceTreeEvaluator() { 049 } 050 051 /** 052 * Evaluate the goodness of fit of a given tree to the original distance 053 * matrix. The returned value is the coefficient of variation, i.e. the 054 * square root of the LS error normalized by the mean. 055 * <p> 056 * This measure can also give an estimate of the quality of the distance 057 * matrix, because a bad fit may mean that the distance is non-additive. 058 * 059 * @param tree 060 * Phylogenetic Distance Tree to evaluate 061 * @param matrix 062 * Distance Matrix with the original distances 063 * @return the square root of the average tree LS error normalized by the 064 * average tree distance (coefficient of variation, CV). 065 */ 066 public static double evaluate(Phylogeny tree, DistanceMatrix matrix) { 067 int numSequences = matrix.getSize(); 068 List<PhylogenyNode> externalNodes = tree.getExternalNodes(); 069 HashMap<String, PhylogenyNode> externalNodesHashMap = new HashMap<String, PhylogenyNode>(); 070 Set<PhylogenyNode> path = new HashSet<PhylogenyNode>(); 071 072 for (PhylogenyNode node : externalNodes) { 073 externalNodesHashMap.put(node.getName(), node); 074 } 075 int count = 0; 076 double averageMatrixDistance = 0.0; 077 double averageTreeDistance = 0.0; 078 double averageTreeErrorDistance = 0.0; 079 for (int row = 0; row < numSequences - 1; row++) { 080 String nodeName1 = matrix.getIdentifier(row); 081 PhylogenyNode node1 = externalNodesHashMap.get(nodeName1); 082 markPathToRoot(node1, path); 083 for (int col = row + 1; col < numSequences; col++) { 084 count++; 085 String nodeName2 = matrix.getIdentifier(col); 086 PhylogenyNode node2 = externalNodesHashMap.get(nodeName2); 087 double distance = matrix.getValue(col, row); 088 averageMatrixDistance = averageMatrixDistance + distance; 089 PhylogenyNode commonParent = findCommonParent(node2, path); 090 if (commonParent != null) { 091 double treeDistance = getNodeDistance(commonParent, node1) 092 + getNodeDistance(commonParent, node2); 093 094 averageTreeDistance += treeDistance; 095 averageTreeErrorDistance += (distance - treeDistance) 096 * (distance - treeDistance); 097 logger.info("{} {} Distance: {}Tree: {} difference: {}", 098 nodeName1, nodeName2, distance, treeDistance, 099 Math.abs(distance - treeDistance)); 100 } else { 101 logger.warn("Unable to find common parent with {} {}", 102 node1, node2); 103 } 104 } 105 path.clear(); 106 } 107 averageMatrixDistance /= count; 108 averageTreeDistance /= count; 109 averageTreeErrorDistance /= count; 110 111 logger.info("Average matrix distance: {}", averageMatrixDistance); 112 logger.info("Average tree distance: {}", averageTreeDistance); 113 logger.info("Average LS error: {}", averageTreeErrorDistance); 114 115 return Math.sqrt(averageTreeErrorDistance) / averageMatrixDistance; 116 } 117 118 private static double getNodeDistance(PhylogenyNode parentNode, 119 PhylogenyNode childNode) { 120 double distance = 0.0; 121 while (childNode != parentNode) { 122 distance = distance + childNode.getDistanceToParent(); 123 childNode = childNode.getParent(); 124 } 125 126 return distance; 127 } 128 129 private static PhylogenyNode findCommonParent(PhylogenyNode node, 130 Set<PhylogenyNode> path) { 131 while (!path.contains(node)) { 132 node = node.getParent(); 133 } 134 return node; 135 } 136 137 private static void markPathToRoot(PhylogenyNode node, 138 Set<PhylogenyNode> path) { 139 path.add(node); 140 while (!node.isRoot()) { 141 node = node.getParent(); 142 path.add(node); 143 } 144 } 145}