001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phylo;
022
023import org.forester.evoinference.matrix.distance.DistanceMatrix;
024import org.forester.phylogeny.Phylogeny;
025import org.forester.phylogeny.PhylogenyNode;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029import java.util.HashMap;
030import java.util.HashSet;
031import java.util.List;
032import java.util.Set;
033
034/**
035 * Check the accuracy of a Distance Tree by least squares error (LSE) of the
036 * Tree branch lengths and the original Distance Matrix.
037 *
038 * @author Scooter Willis
039 * @author Aleix Lafita
040 *
041 */
042public class DistanceTreeEvaluator {
043
044        private static final Logger logger = LoggerFactory
045                        .getLogger(DistanceTreeEvaluator.class);
046
047        /** Prevent instantiation */
048        private DistanceTreeEvaluator() {
049        }
050
051        /**
052         * Evaluate the goodness of fit of a given tree to the original distance
053         * matrix. The returned value is the coefficient of variation, i.e. the
054         * square root of the LS error normalized by the mean.
055         * <p>
056         * This measure can also give an estimate of the quality of the distance
057         * matrix, because a bad fit may mean that the distance is non-additive.
058         *
059         * @param tree
060         *            Phylogenetic Distance Tree to evaluate
061         * @param matrix
062         *            Distance Matrix with the original distances
063         * @return the square root of the average tree LS error normalized by the
064         *         average tree distance (coefficient of variation, CV).
065         */
066        public static double evaluate(Phylogeny tree, DistanceMatrix matrix) {
067                int numSequences = matrix.getSize();
068                List<PhylogenyNode> externalNodes = tree.getExternalNodes();
069                HashMap<String, PhylogenyNode> externalNodesHashMap = new HashMap<String, PhylogenyNode>();
070                Set<PhylogenyNode> path = new HashSet<PhylogenyNode>();
071
072                for (PhylogenyNode node : externalNodes) {
073                        externalNodesHashMap.put(node.getName(), node);
074                }
075                int count = 0;
076                double averageMatrixDistance = 0.0;
077                double averageTreeDistance = 0.0;
078                double averageTreeErrorDistance = 0.0;
079                for (int row = 0; row < numSequences - 1; row++) {
080                        String nodeName1 = matrix.getIdentifier(row);
081                        PhylogenyNode node1 = externalNodesHashMap.get(nodeName1);
082                        markPathToRoot(node1, path);
083                        for (int col = row + 1; col < numSequences; col++) {
084                                count++;
085                                String nodeName2 = matrix.getIdentifier(col);
086                                PhylogenyNode node2 = externalNodesHashMap.get(nodeName2);
087                                double distance = matrix.getValue(col, row);
088                                averageMatrixDistance = averageMatrixDistance + distance;
089                                PhylogenyNode commonParent = findCommonParent(node2, path);
090                                if (commonParent != null) {
091                                        double treeDistance = getNodeDistance(commonParent, node1)
092                                                        + getNodeDistance(commonParent, node2);
093
094                                        averageTreeDistance += treeDistance;
095                                        averageTreeErrorDistance += (distance - treeDistance)
096                                                        * (distance - treeDistance);
097                                        logger.info("{} {} Distance: {}Tree: {} difference: {}",
098                                                        nodeName1, nodeName2, distance, treeDistance,
099                                                        Math.abs(distance - treeDistance));
100                                } else {
101                                        logger.warn("Unable to find common parent with {} {}",
102                                                        node1, node2);
103                                }
104                        }
105                        path.clear();
106                }
107                averageMatrixDistance /= count;
108                averageTreeDistance /= count;
109                averageTreeErrorDistance /= count;
110
111                logger.info("Average matrix distance: {}", averageMatrixDistance);
112                logger.info("Average tree distance: {}", averageTreeDistance);
113                logger.info("Average LS error: {}", averageTreeErrorDistance);
114
115                return Math.sqrt(averageTreeErrorDistance) / averageMatrixDistance;
116        }
117
118        private static double getNodeDistance(PhylogenyNode parentNode,
119                        PhylogenyNode childNode) {
120                double distance = 0.0;
121                while (childNode != parentNode) {
122                        distance = distance + childNode.getDistanceToParent();
123                        childNode = childNode.getParent();
124                }
125
126                return distance;
127        }
128
129        private static PhylogenyNode findCommonParent(PhylogenyNode node,
130                        Set<PhylogenyNode> path) {
131                while (!path.contains(node)) {
132                        node = node.getParent();
133                }
134                return node;
135        }
136
137        private static void markPathToRoot(PhylogenyNode node,
138                        Set<PhylogenyNode> path) {
139                path.add(node);
140                while (!node.isRoot()) {
141                        node = node.getParent();
142                        path.add(node);
143                }
144        }
145}