001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojavax.bio.phylo.io.phylip;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileReader;
026import java.io.FileWriter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.OutputStream;
031import java.io.OutputStreamWriter;
032import java.io.Reader;
033import java.io.Writer;
034import java.util.ArrayList;
035import java.util.Iterator;
036import java.util.List;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039
040import org.biojava.bio.alignment.Alignment;
041import org.biojava.bio.seq.io.ParseException;
042
043/**
044 * Reads PHYLIP interleaved alignment files and fires events at a PHYLIPFileListener object.
045 * 
046 * @author Richard Holland
047 * @author Tobias Thierer
048 * @author Jim Balhoff
049 * @since 1.6
050 */
051public class PHYLIPFileFormat {
052  
053  private static int MAX_NAME_LENGTH = 10;
054  
055//Prevent instances.
056  private PHYLIPFileFormat() {
057  }
058  
059  public static void parseFile(final PHYLIPFileListener listener, final File inputFile) throws IOException, ParseException {
060    final FileReader fr = new FileReader(inputFile);
061    try {
062      PHYLIPFileFormat.parseReader(listener, fr);
063    } finally {
064      fr.close();
065    }
066  }
067  
068  public static void parseInputStream(final PHYLIPFileListener listener,
069      final InputStream inputStream) throws IOException, ParseException {
070    PHYLIPFileFormat.parseReader(listener,
071        new InputStreamReader(inputStream));
072  }
073  
074  public static void parseReader(final PHYLIPFileListener listener,
075      final Reader inputReader) throws IOException, ParseException {
076    PHYLIPFileFormat.parse(listener, inputReader instanceof BufferedReader ? (BufferedReader) inputReader
077            : new BufferedReader(inputReader));
078  }
079  
080  public static void parse(final PHYLIPFileListener listener, BufferedReader reader) throws IOException, ParseException {
081   listener.startFile();
082   List sequenceNames = new ArrayList();
083   String headerline = reader.readLine();
084   Pattern pattern = Pattern.compile("\\s*(\\d+)\\s+(\\d+)\\s*");
085   Matcher matcher = pattern.matcher(headerline);
086   if (!matcher.matches()) {
087     throw new ParseException("Invalid header line.");
088   }
089   int sequenceCount = Integer.parseInt(matcher.group(1));
090   listener.setSequenceCount(sequenceCount);
091   int sitesCount = Integer.parseInt(matcher.group(2));
092   listener.setSitesCount(sitesCount);
093   int currentSequenceIndex = 0;
094   boolean collectedAllNames = false;
095   String line = reader.readLine();
096   while (line != null) {
097     if (line.length() == 0) continue;
098     if (!collectedAllNames) {
099       String name = line.substring(0, MAX_NAME_LENGTH).trim();
100       sequenceNames.add(name);
101       line = line.substring(MAX_NAME_LENGTH).replaceAll("\\s", "");
102     }
103     listener.setCurrentSequenceName((String)sequenceNames.get(currentSequenceIndex));
104     listener.receiveSequence(line);
105     if (sequenceNames.size() == sequenceCount) collectedAllNames = true;
106     currentSequenceIndex++;
107     if (!(currentSequenceIndex < sequenceCount)) currentSequenceIndex = 0;
108     line = reader.readLine();
109   }
110   listener.endFile();
111  }
112  
113  /**
114   * Writes the given Alignment in PHYLIP format to a file.
115   * 
116   * @param file
117   *            the file to write to.
118   * @param alignment
119   *            the Alignment object to write.
120   * @throws IOException
121   *             if there is a problem during writing.
122   */
123  public static void writeFile(final File file, final Alignment alignment)
124      throws IOException {
125    final FileWriter fw = new FileWriter(file);
126    try {
127      PHYLIPFileFormat.writeWriter(fw, alignment);
128    } finally {
129      fw.close();
130    }
131  }
132
133  /**
134   * Writes the given Alignment in PHYLIP format to a stream.
135   * 
136   * @param os
137   *            the stream to write to.
138   * @param alignment
139   *            the Alignment object to write.
140   * @throws IOException
141   *             if there is a problem during writing.
142   */
143  public static void writeStream(final OutputStream os,
144      final Alignment alignment) throws IOException {
145    final OutputStreamWriter ow = new OutputStreamWriter(os);
146    PHYLIPFileFormat.writeWriter(ow, alignment);
147  }
148
149  /**
150   * Writes the given Alignment in PHYLIP format to a writer.
151   * 
152   * @param writer
153   *            the writer to write to.
154   * @param alignment
155   *            the Alignment object to write.
156   * @throws IOException
157   *             if there is a problem during writing.
158   */
159  public static void writeWriter(final Writer writer,
160      final Alignment alignment) throws IOException {
161    String lineSep = System.getProperty("line.separator");
162    writer.write("" + (alignment.getLabels().size()));
163    writer.write("   ");
164    writer.write("" + (alignment.length()) + lineSep);
165    for (Iterator i = alignment.getLabels().iterator(); i.hasNext();) {
166      String label = (String)i.next();
167      String sequence = alignment.symbolListForLabel(label).seqString();
168      writer.write(PHYLIPFileFormat.formatSequenceLabel(label));
169      writer.write(sequence);
170      writer.write(lineSep);
171    } 
172    writer.flush();
173  }
174  
175  private static String formatSequenceLabel(String label) {
176    if (label.length() > MAX_NAME_LENGTH) {
177      return label.substring(0, MAX_NAME_LENGTH);
178    } else if (label.length() < MAX_NAME_LENGTH) {
179      StringBuffer buffer = new StringBuffer(label);
180      while (buffer.length() < MAX_NAME_LENGTH) {
181        buffer.append(" ");
182      }
183      return buffer.toString();
184    }
185    else {
186      return label;
187    }
188  }
189}