001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package demo;
022
023import java.io.InputStream;
024import java.util.LinkedHashMap;
025import java.util.Map;
026
027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
029import org.biojava.nbio.core.sequence.MultipleSequenceAlignment;
030import org.biojava.nbio.core.sequence.ProteinSequence;
031import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
032import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
033import org.biojava.nbio.core.sequence.io.FastaReader;
034import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
035import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
036import org.biojava.nbio.phylo.DistanceMatrixCalculator;
037import org.biojava.nbio.phylo.DistanceTreeEvaluator;
038import org.biojava.nbio.phylo.ForesterWrapper;
039import org.biojava.nbio.phylo.TreeConstructor;
040import org.biojava.nbio.phylo.TreeConstructorType;
041import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
042import org.forester.evoinference.matrix.distance.DistanceMatrix;
043import org.forester.phylogeny.Phylogeny;
044
045/**
046 * This demo contains the CookBook example to create a phylogenetic tree from a
047 * multiple sequence alignment (MSA).
048 *
049 * @author Scooter Willis
050 * @author Aleix Lafita
051 *
052 */
053public class DemoDistanceTree {
054
055        public static void main(String[] args) throws Exception {
056
057                // 0. This is just to load an example MSA from a FASTA file
058                InputStream inStream = TreeConstructor.class
059                                .getResourceAsStream("/PF00104_small.fasta");
060
061                FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
062                                new FastaReader<>(
063                                inStream,
064                                new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
065                                new ProteinSequenceCreator(AminoAcidCompoundSet
066                                                .getAminoAcidCompoundSet()));
067
068                Map<String, ProteinSequence> proteinSequences =
069                                fastaReader.process();
070
071                inStream.close();
072
073                MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
074                                new MultipleSequenceAlignment<>();
075
076                for (ProteinSequence proteinSequence : proteinSequences.values()) {
077                        msa.addAlignedSequence(proteinSequence);
078                }
079
080                long readT = System.currentTimeMillis();
081
082                // 1. Calculate the evolutionary distance matrix (can take long)
083                SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper
084                                .getBlosum62();
085                DistanceMatrix DM = DistanceMatrixCalculator
086                                .dissimilarityScore(msa, M);
087
088                // 2. Construct a distance tree using the NJ algorithm
089                Phylogeny phylo = TreeConstructor.distanceTree(
090                                (BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ);
091
092                long treeT = System.currentTimeMillis();
093                String newick = ForesterWrapper.getNewickString(phylo, true);
094                System.out.println(newick);
095                System.out.println("Tree Construction: " + (treeT - readT) + " ms.");
096
097                // 3. Evaluate the goodness of fit of the tree
098                double cv = DistanceTreeEvaluator.evaluate(phylo, DM);
099                System.out.println("CV of the tree: " + (int) (cv * 100) + " %");
100
101        }
102}