001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phylo; 022 023import java.io.ByteArrayOutputStream; 024import java.io.IOException; 025import java.io.OutputStream; 026import org.biojava.nbio.core.sequence.MultipleSequenceAlignment; 027import org.biojava.nbio.core.sequence.io.FastaWriter; 028import org.biojava.nbio.core.sequence.io.template.FastaHeaderFormatInterface; 029import org.biojava.nbio.core.sequence.template.Compound; 030import org.biojava.nbio.core.sequence.template.Sequence; 031import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; 032import org.forester.io.parsers.FastaParser; 033import org.forester.io.writers.PhylogenyWriter; 034import org.forester.msa.Msa; 035import org.forester.phylogeny.Phylogeny; 036 037/** 038 * This class contains wrapper methods for communication between BioJava and 039 * forester (e.g, Data Structure conversion). 040 * 041 * @author Aleix Lafita 042 * @since 4.1.1 043 * 044 */ 045public class ForesterWrapper { 046 047 /** Prevent instantiation */ 048 private ForesterWrapper() { 049 } 050 051 /** 052 * Convert a BioJava {@link MultipleSequenceAlignment} to a forester 053 * {@link Msa}. The easiest way to convert them is writting the msa as a 054 * FASTA file and then parsing it with the forester {@link FastaParser}. 055 * 056 * @param msa 057 * BioJava MultipleSequenceAlignment 058 * @return forester Msa object 059 * @throws IOException 060 * if the conversion was not possible 061 */ 062 public static <C extends Sequence<D>, D extends Compound> Msa convert( 063 MultipleSequenceAlignment<C, D> msa) throws IOException { 064 065 // Convert the biojava MSA to a FASTA String 066 OutputStream os = new ByteArrayOutputStream(); 067 FastaWriter<C, D> fastaW = new FastaWriter<>(os, 068 msa.getAlignedSequences(), 069 new FastaHeaderFormatInterface<C, D>() { 070 @Override 071 public String getHeader(C sequence) { 072 return sequence.getAccession().toString(); 073 }; 074 }); 075 076 fastaW.process(); 077 String fastaMSA = os.toString(); 078 079 // Parse the FASTA file in forester 080 return FastaParser.parseMsa(fastaMSA); 081 } 082 083 /** 084 * Convert a Phylogenetic tree to its Newick representation, so that it can 085 * be exported to an external application. 086 * 087 * @param phylo 088 * Phylogeny phylogenetic tree 089 * @param writeDistances 090 * write the branch lengths if true 091 * @return 092 * @throws IOException 093 */ 094 public static String getNewickString(Phylogeny phylo, 095 boolean writeDistances) throws IOException { 096 097 PhylogenyWriter w = new PhylogenyWriter(); 098 StringBuffer newickString = w.toNewHampshire(phylo, writeDistances); 099 return newickString.toString(); 100 } 101 102 /** 103 * Helper function to clone a forester symmetrical DistanceMatrix. 104 * 105 * @param distM 106 * forester symmetrical DistanceMatrix 107 * @return identical copy of the forester symmetrical DistanceMatrix 108 */ 109 public static BasicSymmetricalDistanceMatrix cloneDM( 110 BasicSymmetricalDistanceMatrix distM) { 111 112 int n = distM.getSize(); 113 BasicSymmetricalDistanceMatrix cloneDM = 114 new BasicSymmetricalDistanceMatrix(n); 115 116 for (int i = 0; i < n; i++) { 117 cloneDM.setIdentifier(i, distM.getIdentifier(i)); 118 for (int j = i + 1; j < n; j++) { 119 cloneDM.setValue(i, j, distM.getValue(i, j)); 120 } 121 } 122 return cloneDM; 123 } 124 125}