001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojavax.bio.phylo.io.nexus;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileReader;
026import java.io.FileWriter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.io.OutputStream;
031import java.io.OutputStreamWriter;
032import java.io.Reader;
033import java.io.Writer;
034import java.util.Iterator;
035import java.util.Stack;
036import java.util.StringTokenizer;
037
038import org.biojava.bio.seq.io.ParseException;
039
040/**
041 * Reads/writes Nexus files and fires events at a NexusFileListener object.
042 * Blocks are parsed using NexusBlockParser objects provided at runtime. Each of
043 * those objects should probably have a NexusBlockListener object associated
044 * with them that receives events generated from the processed data in the
045 * block.
046 * 
047 * @author Richard Holland
048 * @author Tobias Thierer
049 * @author Jim Balhoff
050 * @since 1.6
051 */
052public class NexusFileFormat {
053
054        /**
055         * New-line symbol.
056         */
057        public static final String NEW_LINE = System.getProperty("line.separator");
058
059        // Prevent instances.
060        private NexusFileFormat() {
061        }
062
063        /**
064         * Parse a file and send events to the given listener.
065         * 
066         * @param listener
067         *            the listener that will receive events.
068         * @param inputFile
069         *            the file to parse.
070         * @throws IOException
071         *             if anything goes wrong with reading the file.
072         * @throws ParseException
073         *             if the file format is incorrect.
074         */
075        public static void parseFile(final NexusFileListener listener,
076                        final File inputFile) throws IOException, ParseException {
077                final FileReader fr = new FileReader(inputFile);
078                try {
079                        NexusFileFormat.parseReader(listener, fr);
080                } finally {
081                        fr.close();
082                }
083        }
084
085        /**
086         * Parse a stream and send events to the given listener.
087         * 
088         * @param listener
089         *            the listener that will receive events.
090         * @param inputStream
091         *            the stream to parse.
092         * @throws IOException
093         *             if anything goes wrong with reading the stream.
094         * @throws ParseException
095         *             if the stream format is incorrect.
096         */
097        public static void parseInputStream(final NexusFileListener listener,
098                        final InputStream inputStream) throws IOException, ParseException {
099                NexusFileFormat.parseReader(listener,
100                                new InputStreamReader(inputStream));
101        }
102
103        /**
104         * Parse a reader and send events to the given listener.
105         * 
106         * @param listener
107         *            the listener that will receive events.
108         * @param inputReader
109         *            the file to parse.
110         * @throws IOException
111         *             if anything goes wrong with reading the reader.
112         * @throws ParseException
113         *             if the reader format is incorrect.
114         */
115        public static void parseReader(final NexusFileListener listener,
116                        final Reader inputReader) throws IOException, ParseException {
117                NexusFileFormat
118                                .parse(
119                                                listener,
120                                                inputReader instanceof BufferedReader ? (BufferedReader) inputReader
121                                                                : new BufferedReader(inputReader));
122        }
123
124        // Do the work!
125        private static void parse(final NexusFileListener listener,
126                        final BufferedReader reader) throws IOException, ParseException {
127                // What are our delims?
128                String space = " ";
129                String tab = "\t";
130                String beginComment = "[";
131                String endComment = "]";
132                String singleQuote = "'";
133                String underscore = "_";
134                String endTokenGroup = ";";
135                String openBracket = "(";
136                String closeBracket = ")";
137                String openBrace = "{";
138                String closeBrace = "}";
139                String newLine = "\n";
140                String allDelims = space + tab + beginComment + endComment
141                                + singleQuote + underscore + endTokenGroup + newLine
142                                + openBracket + closeBracket + openBrace + closeBrace;
143
144                // Reset status flags.
145                int inComment = 0;
146                boolean inSingleQuotes = false;
147                boolean inDoubleQuotes = false;
148                boolean singleQuoteOpened = false;
149                TokenParser parser = new TokenParser(listener);
150
151                // Read the file line-by-line.
152                final Stack parsedTokBufferStack = new Stack();
153                StringBuffer parsedTokBuffer = new StringBuffer();
154                String line;
155                while ((line = reader.readLine()) != null) {
156                        final StringTokenizer tokenizer = new StringTokenizer(
157                                        line.replaceAll("\\r\\n|\\r", "\n")
158                                        + "\n", allDelims, true);
159                        while (tokenizer.hasMoreTokens()) {
160                                final String tok = tokenizer.nextToken();
161
162                                // Process token.
163                                if (allDelims.indexOf(tok) >= 0) {
164                                        // Process double quotes by flipping inside quote
165                                        // status and appending the quote to the end of
166                                        // the current token buffer then skipping to the
167                                        // next parsed token.
168                                        if (singleQuoteOpened && singleQuote.equals(tok)) {
169                                                inSingleQuotes = !inSingleQuotes;
170                                                parsedTokBuffer.append(singleQuote);
171                                        }
172                                        // Stuff inside comments.
173                                        else if (inComment > 0) {
174                                                // Start or end quotes?
175                                                if (singleQuote.equals(tok))
176                                                        inSingleQuotes = !inSingleQuotes;
177                                                // Nested comment.
178                                                else if (beginComment.equals(tok) && !inSingleQuotes
179                                                                && !inDoubleQuotes) {
180                                                        // Flush any existing comment text.
181                                                        if (parsedTokBuffer.length() > 0) {
182                                                                listener
183                                                                                .commentText(parsedTokBuffer.toString());
184                                                                parsedTokBuffer.setLength(0);
185                                                        }
186                                                        // Start the new comment.
187                                                        inComment++;
188                                                        listener.beginComment();
189                                                        parsedTokBufferStack.push(parsedTokBuffer);
190                                                        parsedTokBuffer = new StringBuffer();
191                                                }
192                                                // Closing comment, not inside quotes. This
193                                                // fires the current token buffer contents
194                                                // as plain text at the listener, then clears
195                                                // the buffer.
196                                                else if (endComment.equals(tok) && !inSingleQuotes
197                                                                && !inDoubleQuotes) {
198                                                        inComment--;
199                                                        if (parsedTokBuffer.length() > 0)
200                                                                listener
201                                                                                .commentText(parsedTokBuffer.toString());
202                                                        listener.endComment();
203                                                        parsedTokBuffer = (StringBuffer) parsedTokBufferStack
204                                                                        .pop();
205                                                }
206                                                // All other tokens are appended to the comment
207                                                // buffer.
208                                                else
209                                                        parsedTokBuffer.append(tok);
210                                        }
211                                        // Delimiter inside quotes.
212                                        else if (inSingleQuotes) {
213                                                // Closing quote puts us outside quotes.
214                                                if (singleQuote.equals(tok))
215                                                        inSingleQuotes = false;
216                                                // All other delimiters copied verbatim.
217                                                else
218                                                        parsedTokBuffer.append(tok);
219                                        }
220                                        // Delimiter outside quote or comment.
221                                        else {
222                                                // Begin comment.
223                                                if (beginComment.equals(tok)) {
224                                                        // Start the new comment.
225                                                        inComment++;
226                                                        listener.beginComment();
227                                                        // Preserve any existing part-built tag.
228                                                        parsedTokBufferStack.push(parsedTokBuffer);
229                                                        parsedTokBuffer = new StringBuffer();
230                                                }
231                                                // Start quoted string.
232                                                else if (singleQuote.equals(tok))
233                                                        inSingleQuotes = true;
234                                                // Convert underscores to spaces.
235                                                else if (underscore.equals(tok))
236                                                        parsedTokBuffer.append(space);
237                                                // Brackets. Pass through as tokens if
238                                                // the client wishes, or just append to buffer
239                                                // if client doesn't care.
240                                                else if (openBracket.equals(tok)
241                                                                || closeBracket.equals(tok)
242                                                                || openBrace.equals(tok)
243                                                                || closeBrace.equals(tok)) {
244                                                        if (listener.wantsBracketsAndBraces()) {
245                                                                // Dump buffer so far.
246                                                                final String parsedTok = parsedTokBuffer
247                                                                                .toString();
248                                                                parsedTokBuffer.setLength(0);
249                                                                parser.parseToken(parsedTok);
250                                                                // Parse bracket/brace itself.
251                                                                listener.parseToken(tok);
252                                                        } else
253                                                                parsedTokBuffer.append(tok);
254                                                }
255                                                // Use whitespace/semi-colon to indicate end
256                                                // of current token.
257                                                else if (space.equals(tok) || tab.equals(tok)
258                                                                || endTokenGroup.equals(tok)
259                                                                || newLine.equals(tok)) {
260                                                        // Don't bother checking token buffer contents if
261                                                        // the buffer is empty.
262                                                        if (parsedTokBuffer.length() > 0) {
263                                                                final String parsedTok = parsedTokBuffer
264                                                                                .toString();
265                                                                parsedTokBuffer.setLength(0);
266                                                                parser.parseToken(parsedTok);
267                                                        }
268
269                                                        // If this was an end-line, let the listeners know.
270                                                        if (endTokenGroup.equals(tok))
271                                                                listener.endTokenGroup();
272                                                        // Otherwise pass all whitespace through as
273                                                        // additional tokens.
274                                                        else
275                                                                listener.parseToken(tok);
276                                                }
277                                        }
278                                }
279                                // Process all non-delimiter tokens.
280                                else
281                                        // Add token to buffer so far.
282                                        parsedTokBuffer.append(tok);
283
284                                // Update double quote status. The next token is a potential
285                                // double
286                                // quote if the previous token was NOT a quote but this one IS.
287                                singleQuoteOpened = !singleQuoteOpened
288                                                && singleQuote.equals(tok);
289                        }
290                }
291
292                // End the listener.
293                listener.endFile();
294        }
295
296        private static class TokenParser {
297
298                private boolean expectingHeader = true;
299
300                private boolean expectingBeginTag = false;
301
302                private boolean expectingBeginName = false;
303
304                private boolean expectingBlockContents = false;
305
306                private NexusFileListener listener;
307
308                private TokenParser(final NexusFileListener listener) {
309                        this.listener = listener;
310                }
311
312                private void parseToken(final String parsedTok) throws ParseException {
313
314                        // Expecting header?
315                        if (this.expectingHeader && "#NEXUS".equalsIgnoreCase(parsedTok)) {
316                                this.expectingHeader = false;
317                                this.expectingBeginTag = true;
318                                this.listener.startFile();
319                        }
320
321                        // Expecting a BEGIN tag?
322                        else if (this.expectingBeginTag
323                                        && "BEGIN".equalsIgnoreCase(parsedTok)) {
324                                this.expectingBeginTag = false;
325                                this.expectingBeginName = true;
326                        }
327
328                        // Expecting a name for a BEGIN block?
329                        else if (this.expectingBeginName) {
330                                this.listener.startBlock(parsedTok);
331                                this.expectingBeginName = false;
332                                this.expectingBlockContents = true;
333                        }
334
335                        // Looking for block contents?
336                        else if (this.expectingBlockContents) {
337                                // End tag?
338                                if ("END".equalsIgnoreCase(parsedTok)) {
339                                        this.listener.endBlock();
340                                        this.expectingBlockContents = false;
341                                        this.expectingBeginTag = true;
342                                }
343                                // Or just normal token.
344                                else
345                                        this.listener.parseToken(parsedTok);
346                        }
347
348                        // All other situations.
349                        else
350                                throw new ParseException(
351                                                "Parser in unknown state when parsing token \""
352                                                                + parsedTok + "\"");
353                }
354        }
355
356        /**
357         * Writes the given Nexus output to a file.
358         * 
359         * @param file
360         *            the file to write to.
361         * @param nexusFile
362         *            the Nexus output to write.
363         * @throws IOException
364         *             if there is a problem during writing.
365         */
366        public static void writeFile(final File file, final NexusFile nexusFile)
367                        throws IOException {
368                final FileWriter fw = new FileWriter(file);
369                try {
370                        NexusFileFormat.writeWriter(fw, nexusFile);
371                } finally {
372                        fw.close();
373                }
374        }
375
376        /**
377         * Writes the given Nexus output to a stream.
378         * 
379         * @param os
380         *            the stream to write to.
381         * @param nexusFile
382         *            the Nexus output to write.
383         * @throws IOException
384         *             if there is a problem during writing.
385         */
386        public static void writeStream(final OutputStream os,
387                        final NexusFile nexusFile) throws IOException {
388                final OutputStreamWriter ow = new OutputStreamWriter(os);
389                NexusFileFormat.writeWriter(ow, nexusFile);
390        }
391
392        /**
393         * Writes the given Nexus output to a writer.
394         * 
395         * @param writer
396         *            the writer to write to.
397         * @param nexusFile
398         *            the Nexus output to write.
399         * @throws IOException
400         *             if there is a problem during writing.
401         */
402        public static void writeWriter(final Writer writer,
403                        final NexusFile nexusFile) throws IOException {
404                writer.write("#NEXUS");
405                writer.write(NexusFileFormat.NEW_LINE);
406                for (final Iterator i = nexusFile.objectIterator(); i.hasNext();) {
407                        ((NexusObject) i.next()).writeObject(writer);
408                        writer.write(NexusFileFormat.NEW_LINE);
409                }
410                writer.flush();
411        }
412}