001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.phylo.io.phylip; 022 023import java.io.BufferedReader; 024import java.io.File; 025import java.io.FileReader; 026import java.io.FileWriter; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.InputStreamReader; 030import java.io.OutputStream; 031import java.io.OutputStreamWriter; 032import java.io.Reader; 033import java.io.Writer; 034import java.util.ArrayList; 035import java.util.Iterator; 036import java.util.List; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039 040import org.biojava.bio.alignment.Alignment; 041import org.biojava.bio.seq.io.ParseException; 042 043/** 044 * Reads PHYLIP interleaved alignment files and fires events at a PHYLIPFileListener object. 045 * 046 * @author Richard Holland 047 * @author Tobias Thierer 048 * @author Jim Balhoff 049 * @since 1.6 050 */ 051public class PHYLIPFileFormat { 052 053 private static int MAX_NAME_LENGTH = 10; 054 055//Prevent instances. 056 private PHYLIPFileFormat() { 057 } 058 059 public static void parseFile(final PHYLIPFileListener listener, final File inputFile) throws IOException, ParseException { 060 final FileReader fr = new FileReader(inputFile); 061 try { 062 PHYLIPFileFormat.parseReader(listener, fr); 063 } finally { 064 fr.close(); 065 } 066 } 067 068 public static void parseInputStream(final PHYLIPFileListener listener, 069 final InputStream inputStream) throws IOException, ParseException { 070 PHYLIPFileFormat.parseReader(listener, 071 new InputStreamReader(inputStream)); 072 } 073 074 public static void parseReader(final PHYLIPFileListener listener, 075 final Reader inputReader) throws IOException, ParseException { 076 PHYLIPFileFormat.parse(listener, inputReader instanceof BufferedReader ? (BufferedReader) inputReader 077 : new BufferedReader(inputReader)); 078 } 079 080 public static void parse(final PHYLIPFileListener listener, BufferedReader reader) throws IOException, ParseException { 081 listener.startFile(); 082 List sequenceNames = new ArrayList(); 083 String headerline = reader.readLine(); 084 Pattern pattern = Pattern.compile("\\s*(\\d+)\\s+(\\d+)\\s*"); 085 Matcher matcher = pattern.matcher(headerline); 086 if (!matcher.matches()) { 087 throw new ParseException("Invalid header line."); 088 } 089 int sequenceCount = Integer.parseInt(matcher.group(1)); 090 listener.setSequenceCount(sequenceCount); 091 int sitesCount = Integer.parseInt(matcher.group(2)); 092 listener.setSitesCount(sitesCount); 093 int currentSequenceIndex = 0; 094 boolean collectedAllNames = false; 095 String line = reader.readLine(); 096 while (line != null) { 097 if (line.length() == 0) continue; 098 if (!collectedAllNames) { 099 String name = line.substring(0, MAX_NAME_LENGTH).trim(); 100 sequenceNames.add(name); 101 line = line.substring(MAX_NAME_LENGTH).replaceAll("\\s", ""); 102 } 103 listener.setCurrentSequenceName((String)sequenceNames.get(currentSequenceIndex)); 104 listener.receiveSequence(line); 105 if (sequenceNames.size() == sequenceCount) collectedAllNames = true; 106 currentSequenceIndex++; 107 if (!(currentSequenceIndex < sequenceCount)) currentSequenceIndex = 0; 108 line = reader.readLine(); 109 } 110 listener.endFile(); 111 } 112 113 /** 114 * Writes the given Alignment in PHYLIP format to a file. 115 * 116 * @param file 117 * the file to write to. 118 * @param alignment 119 * the Alignment object to write. 120 * @throws IOException 121 * if there is a problem during writing. 122 */ 123 public static void writeFile(final File file, final Alignment alignment) 124 throws IOException { 125 final FileWriter fw = new FileWriter(file); 126 try { 127 PHYLIPFileFormat.writeWriter(fw, alignment); 128 } finally { 129 fw.close(); 130 } 131 } 132 133 /** 134 * Writes the given Alignment in PHYLIP format to a stream. 135 * 136 * @param os 137 * the stream to write to. 138 * @param alignment 139 * the Alignment object to write. 140 * @throws IOException 141 * if there is a problem during writing. 142 */ 143 public static void writeStream(final OutputStream os, 144 final Alignment alignment) throws IOException { 145 final OutputStreamWriter ow = new OutputStreamWriter(os); 146 PHYLIPFileFormat.writeWriter(ow, alignment); 147 } 148 149 /** 150 * Writes the given Alignment in PHYLIP format to a writer. 151 * 152 * @param writer 153 * the writer to write to. 154 * @param alignment 155 * the Alignment object to write. 156 * @throws IOException 157 * if there is a problem during writing. 158 */ 159 public static void writeWriter(final Writer writer, 160 final Alignment alignment) throws IOException { 161 String lineSep = System.getProperty("line.separator"); 162 writer.write("" + (alignment.getLabels().size())); 163 writer.write(" "); 164 writer.write("" + (alignment.length()) + lineSep); 165 for (Iterator i = alignment.getLabels().iterator(); i.hasNext();) { 166 String label = (String)i.next(); 167 String sequence = alignment.symbolListForLabel(label).seqString(); 168 writer.write(PHYLIPFileFormat.formatSequenceLabel(label)); 169 writer.write(sequence); 170 writer.write(lineSep); 171 } 172 writer.flush(); 173 } 174 175 private static String formatSequenceLabel(String label) { 176 if (label.length() > MAX_NAME_LENGTH) { 177 return label.substring(0, MAX_NAME_LENGTH); 178 } else if (label.length() < MAX_NAME_LENGTH) { 179 StringBuffer buffer = new StringBuffer(label); 180 while (buffer.length() < MAX_NAME_LENGTH) { 181 buffer.append(" "); 182 } 183 return buffer.toString(); 184 } 185 else { 186 return label; 187 } 188 } 189}