001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import java.io.InputStream; 024import java.util.LinkedHashMap; 025 026import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 027import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 028import org.biojava.nbio.core.sequence.MultipleSequenceAlignment; 029import org.biojava.nbio.core.sequence.ProteinSequence; 030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 031import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 032import org.biojava.nbio.core.sequence.io.FastaReader; 033import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser; 034import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator; 035import org.biojava.nbio.phylo.DistanceMatrixCalculator; 036import org.biojava.nbio.phylo.DistanceTreeEvaluator; 037import org.biojava.nbio.phylo.ForesterWrapper; 038import org.biojava.nbio.phylo.TreeConstructor; 039import org.biojava.nbio.phylo.TreeConstructorType; 040import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; 041import org.forester.evoinference.matrix.distance.DistanceMatrix; 042import org.forester.phylogeny.Phylogeny; 043 044/** 045 * This demo contains the CookBook example to create a phylogenetic tree from a 046 * multiple sequence alignment (MSA). 047 * 048 * @author Scooter Willis 049 * @author Aleix Lafita 050 * 051 */ 052public class DemoDistanceTree { 053 054 public static void main(String[] args) throws Exception { 055 056 // 0. This is just to load an example MSA from a FASTA file 057 InputStream inStream = TreeConstructor.class 058 .getResourceAsStream("/PF00104_small.fasta"); 059 060 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = 061 new FastaReader<ProteinSequence, AminoAcidCompound>( 062 inStream, 063 new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), 064 new ProteinSequenceCreator(AminoAcidCompoundSet 065 .getAminoAcidCompoundSet())); 066 067 LinkedHashMap<String, ProteinSequence> proteinSequences = 068 fastaReader.process(); 069 070 inStream.close(); 071 072 MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa = 073 new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>(); 074 075 for (ProteinSequence proteinSequence : proteinSequences.values()) { 076 msa.addAlignedSequence(proteinSequence); 077 } 078 079 long readT = System.currentTimeMillis(); 080 081 // 1. Calculate the evolutionary distance matrix (can take long) 082 SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper 083 .getBlosum62(); 084 DistanceMatrix DM = DistanceMatrixCalculator 085 .dissimilarityScore(msa, M); 086 087 // 2. Construct a distance tree using the NJ algorithm 088 Phylogeny phylo = TreeConstructor.distanceTree( 089 (BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ); 090 091 long treeT = System.currentTimeMillis(); 092 String newick = ForesterWrapper.getNewickString(phylo, true); 093 System.out.println(newick); 094 System.out.println("Tree Construction: " + (treeT - readT) + " ms."); 095 096 // 3. Evaluate the goodness of fit of the tree 097 double cv = DistanceTreeEvaluator.evaluate(phylo, DM); 098 System.out.println("CV of the tree: " + (int) (cv * 100) + " %"); 099 100 } 101}