001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.phylo.io.nexus; 022 023import java.io.BufferedReader; 024import java.io.File; 025import java.io.FileReader; 026import java.io.FileWriter; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.InputStreamReader; 030import java.io.OutputStream; 031import java.io.OutputStreamWriter; 032import java.io.Reader; 033import java.io.Writer; 034import java.util.Iterator; 035import java.util.Stack; 036import java.util.StringTokenizer; 037 038import org.biojava.bio.seq.io.ParseException; 039 040/** 041 * Reads/writes Nexus files and fires events at a NexusFileListener object. 042 * Blocks are parsed using NexusBlockParser objects provided at runtime. Each of 043 * those objects should probably have a NexusBlockListener object associated 044 * with them that receives events generated from the processed data in the 045 * block. 046 * 047 * @author Richard Holland 048 * @author Tobias Thierer 049 * @author Jim Balhoff 050 * @since 1.6 051 */ 052public class NexusFileFormat { 053 054 /** 055 * New-line symbol. 056 */ 057 public static final String NEW_LINE = System.getProperty("line.separator"); 058 059 // Prevent instances. 060 private NexusFileFormat() { 061 } 062 063 /** 064 * Parse a file and send events to the given listener. 065 * 066 * @param listener 067 * the listener that will receive events. 068 * @param inputFile 069 * the file to parse. 070 * @throws IOException 071 * if anything goes wrong with reading the file. 072 * @throws ParseException 073 * if the file format is incorrect. 074 */ 075 public static void parseFile(final NexusFileListener listener, 076 final File inputFile) throws IOException, ParseException { 077 final FileReader fr = new FileReader(inputFile); 078 try { 079 NexusFileFormat.parseReader(listener, fr); 080 } finally { 081 fr.close(); 082 } 083 } 084 085 /** 086 * Parse a stream and send events to the given listener. 087 * 088 * @param listener 089 * the listener that will receive events. 090 * @param inputStream 091 * the stream to parse. 092 * @throws IOException 093 * if anything goes wrong with reading the stream. 094 * @throws ParseException 095 * if the stream format is incorrect. 096 */ 097 public static void parseInputStream(final NexusFileListener listener, 098 final InputStream inputStream) throws IOException, ParseException { 099 NexusFileFormat.parseReader(listener, 100 new InputStreamReader(inputStream)); 101 } 102 103 /** 104 * Parse a reader and send events to the given listener. 105 * 106 * @param listener 107 * the listener that will receive events. 108 * @param inputReader 109 * the file to parse. 110 * @throws IOException 111 * if anything goes wrong with reading the reader. 112 * @throws ParseException 113 * if the reader format is incorrect. 114 */ 115 public static void parseReader(final NexusFileListener listener, 116 final Reader inputReader) throws IOException, ParseException { 117 NexusFileFormat 118 .parse( 119 listener, 120 inputReader instanceof BufferedReader ? (BufferedReader) inputReader 121 : new BufferedReader(inputReader)); 122 } 123 124 // Do the work! 125 private static void parse(final NexusFileListener listener, 126 final BufferedReader reader) throws IOException, ParseException { 127 // What are our delims? 128 String space = " "; 129 String tab = "\t"; 130 String beginComment = "["; 131 String endComment = "]"; 132 String singleQuote = "'"; 133 String underscore = "_"; 134 String endTokenGroup = ";"; 135 String openBracket = "("; 136 String closeBracket = ")"; 137 String openBrace = "{"; 138 String closeBrace = "}"; 139 String newLine = "\n"; 140 String allDelims = space + tab + beginComment + endComment 141 + singleQuote + underscore + endTokenGroup + newLine 142 + openBracket + closeBracket + openBrace + closeBrace; 143 144 // Reset status flags. 145 int inComment = 0; 146 boolean inSingleQuotes = false; 147 boolean inDoubleQuotes = false; 148 boolean singleQuoteOpened = false; 149 TokenParser parser = new TokenParser(listener); 150 151 // Read the file line-by-line. 152 final Stack parsedTokBufferStack = new Stack(); 153 StringBuffer parsedTokBuffer = new StringBuffer(); 154 String line; 155 while ((line = reader.readLine()) != null) { 156 final StringTokenizer tokenizer = new StringTokenizer( 157 line.replaceAll("\\r\\n|\\r", "\n") 158 + "\n", allDelims, true); 159 while (tokenizer.hasMoreTokens()) { 160 final String tok = tokenizer.nextToken(); 161 162 // Process token. 163 if (allDelims.indexOf(tok) >= 0) { 164 // Process double quotes by flipping inside quote 165 // status and appending the quote to the end of 166 // the current token buffer then skipping to the 167 // next parsed token. 168 if (singleQuoteOpened && singleQuote.equals(tok)) { 169 inSingleQuotes = !inSingleQuotes; 170 parsedTokBuffer.append(singleQuote); 171 } 172 // Stuff inside comments. 173 else if (inComment > 0) { 174 // Start or end quotes? 175 if (singleQuote.equals(tok)) 176 inSingleQuotes = !inSingleQuotes; 177 // Nested comment. 178 else if (beginComment.equals(tok) && !inSingleQuotes 179 && !inDoubleQuotes) { 180 // Flush any existing comment text. 181 if (parsedTokBuffer.length() > 0) { 182 listener 183 .commentText(parsedTokBuffer.toString()); 184 parsedTokBuffer.setLength(0); 185 } 186 // Start the new comment. 187 inComment++; 188 listener.beginComment(); 189 parsedTokBufferStack.push(parsedTokBuffer); 190 parsedTokBuffer = new StringBuffer(); 191 } 192 // Closing comment, not inside quotes. This 193 // fires the current token buffer contents 194 // as plain text at the listener, then clears 195 // the buffer. 196 else if (endComment.equals(tok) && !inSingleQuotes 197 && !inDoubleQuotes) { 198 inComment--; 199 if (parsedTokBuffer.length() > 0) 200 listener 201 .commentText(parsedTokBuffer.toString()); 202 listener.endComment(); 203 parsedTokBuffer = (StringBuffer) parsedTokBufferStack 204 .pop(); 205 } 206 // All other tokens are appended to the comment 207 // buffer. 208 else 209 parsedTokBuffer.append(tok); 210 } 211 // Delimiter inside quotes. 212 else if (inSingleQuotes) { 213 // Closing quote puts us outside quotes. 214 if (singleQuote.equals(tok)) 215 inSingleQuotes = false; 216 // All other delimiters copied verbatim. 217 else 218 parsedTokBuffer.append(tok); 219 } 220 // Delimiter outside quote or comment. 221 else { 222 // Begin comment. 223 if (beginComment.equals(tok)) { 224 // Start the new comment. 225 inComment++; 226 listener.beginComment(); 227 // Preserve any existing part-built tag. 228 parsedTokBufferStack.push(parsedTokBuffer); 229 parsedTokBuffer = new StringBuffer(); 230 } 231 // Start quoted string. 232 else if (singleQuote.equals(tok)) 233 inSingleQuotes = true; 234 // Convert underscores to spaces. 235 else if (underscore.equals(tok)) 236 parsedTokBuffer.append(space); 237 // Brackets. Pass through as tokens if 238 // the client wishes, or just append to buffer 239 // if client doesn't care. 240 else if (openBracket.equals(tok) 241 || closeBracket.equals(tok) 242 || openBrace.equals(tok) 243 || closeBrace.equals(tok)) { 244 if (listener.wantsBracketsAndBraces()) { 245 // Dump buffer so far. 246 final String parsedTok = parsedTokBuffer 247 .toString(); 248 parsedTokBuffer.setLength(0); 249 parser.parseToken(parsedTok); 250 // Parse bracket/brace itself. 251 listener.parseToken(tok); 252 } else 253 parsedTokBuffer.append(tok); 254 } 255 // Use whitespace/semi-colon to indicate end 256 // of current token. 257 else if (space.equals(tok) || tab.equals(tok) 258 || endTokenGroup.equals(tok) 259 || newLine.equals(tok)) { 260 // Don't bother checking token buffer contents if 261 // the buffer is empty. 262 if (parsedTokBuffer.length() > 0) { 263 final String parsedTok = parsedTokBuffer 264 .toString(); 265 parsedTokBuffer.setLength(0); 266 parser.parseToken(parsedTok); 267 } 268 269 // If this was an end-line, let the listeners know. 270 if (endTokenGroup.equals(tok)) 271 listener.endTokenGroup(); 272 // Otherwise pass all whitespace through as 273 // additional tokens. 274 else 275 listener.parseToken(tok); 276 } 277 } 278 } 279 // Process all non-delimiter tokens. 280 else 281 // Add token to buffer so far. 282 parsedTokBuffer.append(tok); 283 284 // Update double quote status. The next token is a potential 285 // double 286 // quote if the previous token was NOT a quote but this one IS. 287 singleQuoteOpened = !singleQuoteOpened 288 && singleQuote.equals(tok); 289 } 290 } 291 292 // End the listener. 293 listener.endFile(); 294 } 295 296 private static class TokenParser { 297 298 private boolean expectingHeader = true; 299 300 private boolean expectingBeginTag = false; 301 302 private boolean expectingBeginName = false; 303 304 private boolean expectingBlockContents = false; 305 306 private NexusFileListener listener; 307 308 private TokenParser(final NexusFileListener listener) { 309 this.listener = listener; 310 } 311 312 private void parseToken(final String parsedTok) throws ParseException { 313 314 // Expecting header? 315 if (this.expectingHeader && "#NEXUS".equalsIgnoreCase(parsedTok)) { 316 this.expectingHeader = false; 317 this.expectingBeginTag = true; 318 this.listener.startFile(); 319 } 320 321 // Expecting a BEGIN tag? 322 else if (this.expectingBeginTag 323 && "BEGIN".equalsIgnoreCase(parsedTok)) { 324 this.expectingBeginTag = false; 325 this.expectingBeginName = true; 326 } 327 328 // Expecting a name for a BEGIN block? 329 else if (this.expectingBeginName) { 330 this.listener.startBlock(parsedTok); 331 this.expectingBeginName = false; 332 this.expectingBlockContents = true; 333 } 334 335 // Looking for block contents? 336 else if (this.expectingBlockContents) { 337 // End tag? 338 if ("END".equalsIgnoreCase(parsedTok)) { 339 this.listener.endBlock(); 340 this.expectingBlockContents = false; 341 this.expectingBeginTag = true; 342 } 343 // Or just normal token. 344 else 345 this.listener.parseToken(parsedTok); 346 } 347 348 // All other situations. 349 else 350 throw new ParseException( 351 "Parser in unknown state when parsing token \"" 352 + parsedTok + "\""); 353 } 354 } 355 356 /** 357 * Writes the given Nexus output to a file. 358 * 359 * @param file 360 * the file to write to. 361 * @param nexusFile 362 * the Nexus output to write. 363 * @throws IOException 364 * if there is a problem during writing. 365 */ 366 public static void writeFile(final File file, final NexusFile nexusFile) 367 throws IOException { 368 final FileWriter fw = new FileWriter(file); 369 try { 370 NexusFileFormat.writeWriter(fw, nexusFile); 371 } finally { 372 fw.close(); 373 } 374 } 375 376 /** 377 * Writes the given Nexus output to a stream. 378 * 379 * @param os 380 * the stream to write to. 381 * @param nexusFile 382 * the Nexus output to write. 383 * @throws IOException 384 * if there is a problem during writing. 385 */ 386 public static void writeStream(final OutputStream os, 387 final NexusFile nexusFile) throws IOException { 388 final OutputStreamWriter ow = new OutputStreamWriter(os); 389 NexusFileFormat.writeWriter(ow, nexusFile); 390 } 391 392 /** 393 * Writes the given Nexus output to a writer. 394 * 395 * @param writer 396 * the writer to write to. 397 * @param nexusFile 398 * the Nexus output to write. 399 * @throws IOException 400 * if there is a problem during writing. 401 */ 402 public static void writeWriter(final Writer writer, 403 final NexusFile nexusFile) throws IOException { 404 writer.write("#NEXUS"); 405 writer.write(NexusFileFormat.NEW_LINE); 406 for (final Iterator i = nexusFile.objectIterator(); i.hasNext();) { 407 ((NexusObject) i.next()).writeObject(writer); 408 writer.write(NexusFileFormat.NEW_LINE); 409 } 410 writer.flush(); 411 } 412}