001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.phylo.io.phylip; 022 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedHashMap; 026import java.util.List; 027import java.util.Map.Entry; 028 029import org.biojava.bio.BioException; 030import org.biojava.bio.alignment.Alignment; 031import org.biojava.bio.alignment.FlexibleAlignment; 032import org.biojava.bio.alignment.SimpleAlignmentElement; 033import org.biojava.bio.seq.DNATools; 034import org.biojava.bio.seq.ProteinTools; 035import org.biojava.bio.seq.io.ParseException; 036import org.biojava.bio.symbol.IllegalSymbolException; 037import org.biojava.bio.symbol.Location; 038import org.biojava.bio.symbol.LocationTools; 039import org.biojava.bio.symbol.SymbolList; 040 041/** 042 * Builds a PHYLIP file by listening to events. 043 * 044 * @author Richard Holland 045 * @author Tobias Thierer 046 * @author Jim Balhoff 047 * @since 1.6 048 */ 049public class PHYLIPFileBuilder implements PHYLIPFileListener { 050 051 private LinkedHashMap sequences; 052 private int sequenceCount; 053 private int sitesCount; 054 private String currentSequenceName; 055 private Alignment alignment; 056 057 public void startFile() { 058 this.sequences = new LinkedHashMap(); 059 } 060 061 public void endFile() throws ParseException { 062 this.verifySequenceAndSitesCount(); 063 this.buildAlignment(); 064 } 065 066 public void setSequenceCount(int count) { 067 this.sequenceCount = count; 068 } 069 070 public void setSitesCount(int count) { 071 this.sitesCount = count; 072 } 073 074 public void setCurrentSequenceName(String name) { 075 if (!(this.sequences.containsKey(name))) { 076 sequences.put(name, new StringBuffer()); 077 } 078 this.currentSequenceName = name; 079 } 080 081 public void receiveSequence(String sequence) { 082 StringBuffer buffer = (StringBuffer)(this.sequences.get(this.currentSequenceName)); 083 //System.out.println(sequence); 084 buffer.append(sequence); 085 } 086 087 public Alignment getAlignment() { 088 return this.alignment; 089 } 090 091 private void buildAlignment() throws ParseException { 092 List importedSequences = null; 093 try { 094 importedSequences = this.createSequences(); 095 } catch (IllegalSymbolException e) { 096 throw new ParseException("Illegal symbol in sequence: " + e); 097 } catch (BioException e) { 098 throw new ParseException("Could not create sequences: " + e); 099 } 100 FlexibleAlignment newAlignment; 101 try { 102 newAlignment = new FlexibleAlignment(importedSequences); 103 } catch (BioException e) { 104 throw new ParseException("Could not construct alignment object: " + e); 105 } 106 this.alignment = newAlignment; 107 } 108 109 private List createSequences() throws IllegalSymbolException, BioException { 110 List importedSequences = new ArrayList(); 111 boolean checkedType = false; 112 boolean isDNA = true; 113 Location loc = LocationTools.makeLocation(0, this.sitesCount - 1); 114 for (Iterator i = this.sequences.entrySet().iterator(); i.hasNext(); ) { 115 Entry sequenceEntry = (Entry)i.next(); 116 String name = (String)sequenceEntry.getKey(); 117 String sequence = ((StringBuffer)sequenceEntry.getValue()).toString(); 118 SymbolList symbolList = null; 119 if (!checkedType) { 120 try { 121 DNATools.createGappedDNASequence(sequence, name); 122 } catch (IllegalSymbolException e) { 123 isDNA = false; 124 } 125 checkedType = true; 126 } 127 if (isDNA) { 128 // make DNA sequences 129 symbolList = DNATools.createGappedDNASequence(sequence, name); 130 131 } else { 132 // make protein sequences 133 symbolList = ProteinTools.createGappedProteinSequence(sequence, name); 134 } 135 importedSequences.add(new SimpleAlignmentElement(name, symbolList, loc)); 136 } 137 return importedSequences; 138 } 139 140 private void verifySequenceAndSitesCount() throws ParseException { 141 if (this.sequences.size() != this.sequenceCount) { 142 throw new ParseException("Number of sequences does not match header."); 143 } else { 144 for (Iterator i = this.sequences.values().iterator(); i.hasNext();) { 145 String currentSequence = ((StringBuffer)i.next()).toString(); 146 if (currentSequence.length() != this.sitesCount) { 147 throw new ParseException("Number of sites does not match header."); 148 } 149 } 150 } 151 } 152 }