001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package demo;
022
023import java.io.InputStream;
024import java.util.LinkedHashMap;
025
026import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
027import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
028import org.biojava.nbio.core.sequence.MultipleSequenceAlignment;
029import org.biojava.nbio.core.sequence.ProteinSequence;
030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
031import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
032import org.biojava.nbio.core.sequence.io.FastaReader;
033import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
034import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
035import org.biojava.nbio.phylo.DistanceMatrixCalculator;
036import org.biojava.nbio.phylo.DistanceTreeEvaluator;
037import org.biojava.nbio.phylo.ForesterWrapper;
038import org.biojava.nbio.phylo.TreeConstructor;
039import org.biojava.nbio.phylo.TreeConstructorType;
040import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
041import org.forester.evoinference.matrix.distance.DistanceMatrix;
042import org.forester.phylogeny.Phylogeny;
043
044/**
045 * This demo contains the CookBook example to create a phylogenetic tree from a
046 * multiple sequence alignment (MSA).
047 *
048 * @author Scooter Willis
049 * @author Aleix Lafita
050 *
051 */
052public class DemoDistanceTree {
053
054        public static void main(String[] args) throws Exception {
055
056                // 0. This is just to load an example MSA from a FASTA file
057                InputStream inStream = TreeConstructor.class
058                                .getResourceAsStream("/PF00104_small.fasta");
059
060                FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
061                                new FastaReader<ProteinSequence, AminoAcidCompound>(
062                                inStream,
063                                new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
064                                new ProteinSequenceCreator(AminoAcidCompoundSet
065                                                .getAminoAcidCompoundSet()));
066
067                LinkedHashMap<String, ProteinSequence> proteinSequences =
068                                fastaReader.process();
069
070                inStream.close();
071
072                MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
073                                new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();
074
075                for (ProteinSequence proteinSequence : proteinSequences.values()) {
076                        msa.addAlignedSequence(proteinSequence);
077                }
078
079                long readT = System.currentTimeMillis();
080
081                // 1. Calculate the evolutionary distance matrix (can take long)
082                SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper
083                                .getBlosum62();
084                DistanceMatrix DM = DistanceMatrixCalculator
085                                .dissimilarityScore(msa, M);
086
087                // 2. Construct a distance tree using the NJ algorithm
088                Phylogeny phylo = TreeConstructor.distanceTree(
089                                (BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ);
090
091                long treeT = System.currentTimeMillis();
092                String newick = ForesterWrapper.getNewickString(phylo, true);
093                System.out.println(newick);
094                System.out.println("Tree Construction: " + (treeT - readT) + " ms.");
095
096                // 3. Evaluate the goodness of fit of the tree
097                double cv = DistanceTreeEvaluator.evaluate(phylo, DM);
098                System.out.println("CV of the tree: " + (int) (cv * 100) + " %");
099
100        }
101}