001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import java.io.InputStream; 024import java.util.LinkedHashMap; 025import java.util.Map; 026 027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 028import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 029import org.biojava.nbio.core.sequence.MultipleSequenceAlignment; 030import org.biojava.nbio.core.sequence.ProteinSequence; 031import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 032import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 033import org.biojava.nbio.core.sequence.io.FastaReader; 034import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser; 035import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator; 036import org.biojava.nbio.phylo.DistanceMatrixCalculator; 037import org.biojava.nbio.phylo.DistanceTreeEvaluator; 038import org.biojava.nbio.phylo.ForesterWrapper; 039import org.biojava.nbio.phylo.TreeConstructor; 040import org.biojava.nbio.phylo.TreeConstructorType; 041import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; 042import org.forester.evoinference.matrix.distance.DistanceMatrix; 043import org.forester.phylogeny.Phylogeny; 044 045/** 046 * This demo contains the CookBook example to create a phylogenetic tree from a 047 * multiple sequence alignment (MSA). 048 * 049 * @author Scooter Willis 050 * @author Aleix Lafita 051 * 052 */ 053public class DemoDistanceTree { 054 055 public static void main(String[] args) throws Exception { 056 057 // 0. This is just to load an example MSA from a FASTA file 058 InputStream inStream = TreeConstructor.class 059 .getResourceAsStream("/PF00104_small.fasta"); 060 061 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = 062 new FastaReader<>( 063 inStream, 064 new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), 065 new ProteinSequenceCreator(AminoAcidCompoundSet 066 .getAminoAcidCompoundSet())); 067 068 Map<String, ProteinSequence> proteinSequences = 069 fastaReader.process(); 070 071 inStream.close(); 072 073 MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa = 074 new MultipleSequenceAlignment<>(); 075 076 for (ProteinSequence proteinSequence : proteinSequences.values()) { 077 msa.addAlignedSequence(proteinSequence); 078 } 079 080 long readT = System.currentTimeMillis(); 081 082 // 1. Calculate the evolutionary distance matrix (can take long) 083 SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper 084 .getBlosum62(); 085 DistanceMatrix DM = DistanceMatrixCalculator 086 .dissimilarityScore(msa, M); 087 088 // 2. Construct a distance tree using the NJ algorithm 089 Phylogeny phylo = TreeConstructor.distanceTree( 090 (BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ); 091 092 long treeT = System.currentTimeMillis(); 093 String newick = ForesterWrapper.getNewickString(phylo, true); 094 System.out.println(newick); 095 System.out.println("Tree Construction: " + (treeT - readT) + " ms."); 096 097 // 3. Evaluate the goodness of fit of the tree 098 double cv = DistanceTreeEvaluator.evaluate(phylo, DM); 099 System.out.println("CV of the tree: " + (int) (cv * 100) + " %"); 100 101 } 102}