001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojavax.bio.phylo.io.phylip;
022
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.LinkedHashMap;
026import java.util.List;
027import java.util.Map.Entry;
028
029import org.biojava.bio.BioException;
030import org.biojava.bio.alignment.Alignment;
031import org.biojava.bio.alignment.FlexibleAlignment;
032import org.biojava.bio.alignment.SimpleAlignmentElement;
033import org.biojava.bio.seq.DNATools;
034import org.biojava.bio.seq.ProteinTools;
035import org.biojava.bio.seq.io.ParseException;
036import org.biojava.bio.symbol.IllegalSymbolException;
037import org.biojava.bio.symbol.Location;
038import org.biojava.bio.symbol.LocationTools;
039import org.biojava.bio.symbol.SymbolList;
040
041/**
042 * Builds a PHYLIP file by listening to events.
043 * 
044 * @author Richard Holland
045 * @author Tobias Thierer
046 * @author Jim Balhoff
047 * @since 1.6
048 */
049public class PHYLIPFileBuilder implements PHYLIPFileListener {
050  
051  private LinkedHashMap sequences;
052  private int sequenceCount;
053  private int sitesCount;
054  private String currentSequenceName;
055  private Alignment alignment;
056  
057  public void startFile() {
058    this.sequences = new LinkedHashMap();
059  }
060
061  public void endFile() throws ParseException {
062    this.verifySequenceAndSitesCount();
063    this.buildAlignment();
064  }
065
066  public void setSequenceCount(int count) {
067    this.sequenceCount = count;
068  }
069  
070  public void setSitesCount(int count) {
071    this.sitesCount = count;
072  }
073  
074  public void setCurrentSequenceName(String name) {
075    if (!(this.sequences.containsKey(name))) {
076      sequences.put(name, new StringBuffer());
077    }
078    this.currentSequenceName = name;
079  }
080  
081  public void receiveSequence(String sequence) {
082    StringBuffer buffer = (StringBuffer)(this.sequences.get(this.currentSequenceName));
083    //System.out.println(sequence);
084    buffer.append(sequence);
085  }
086  
087  public Alignment getAlignment() {
088    return this.alignment;
089  }
090  
091  private void buildAlignment() throws ParseException {
092    List importedSequences = null;
093    try {
094      importedSequences = this.createSequences();
095    } catch (IllegalSymbolException e) {
096      throw new ParseException("Illegal symbol in sequence: " + e);
097    } catch (BioException e) {
098      throw new ParseException("Could not create sequences: " + e);
099    }
100    FlexibleAlignment newAlignment;
101    try {
102      newAlignment = new FlexibleAlignment(importedSequences);
103    } catch (BioException e) {
104      throw new ParseException("Could not construct alignment object: " + e);
105    }
106    this.alignment = newAlignment;
107  }
108  
109  private List createSequences() throws IllegalSymbolException, BioException {
110    List importedSequences = new ArrayList();
111    boolean checkedType = false;
112    boolean isDNA = true;
113    Location loc = LocationTools.makeLocation(0, this.sitesCount - 1);
114    for (Iterator i = this.sequences.entrySet().iterator(); i.hasNext(); ) {
115      Entry sequenceEntry = (Entry)i.next();
116      String name = (String)sequenceEntry.getKey();
117      String sequence = ((StringBuffer)sequenceEntry.getValue()).toString();
118      SymbolList symbolList = null;
119      if (!checkedType) {
120        try {
121          DNATools.createGappedDNASequence(sequence, name);
122        } catch (IllegalSymbolException e) {
123          isDNA = false;
124        }
125        checkedType = true;
126      }
127      if (isDNA) {
128        // make DNA sequences
129        symbolList = DNATools.createGappedDNASequence(sequence, name);
130        
131      } else {
132        // make protein sequences
133        symbolList = ProteinTools.createGappedProteinSequence(sequence, name);
134      }
135      importedSequences.add(new SimpleAlignmentElement(name, symbolList, loc));
136    }
137    return importedSequences;
138  } 
139  
140  private void verifySequenceAndSitesCount() throws ParseException {
141    if (this.sequences.size() != this.sequenceCount) {
142      throw new ParseException("Number of sequences does not match header.");
143    } else {
144      for (Iterator i = this.sequences.values().iterator(); i.hasNext();) {
145        String currentSequence = ((StringBuffer)i.next()).toString();
146        if (currentSequence.length() != this.sitesCount) {
147          throw new ParseException("Number of sites does not match header.");
148        }
149      }
150    }
151  }
152 }