001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojavax.bio.phylo.io.nexus;
022
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collection;
026import java.util.HashMap;
027import java.util.LinkedHashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.Stack;
032
033import org.biojava.bio.seq.io.ParseException;
034
035/**
036 * Parses Nexus characters blocks.
037 * 
038 * @author Richard Holland
039 * @author Tobias Thierer
040 * @author Jim Balhoff
041 * @since 1.6
042 */
043public class CharactersBlockParser extends NexusBlockParser.Abstract {
044
045        private boolean expectingDimension;
046
047        private boolean expectingNewTaxa;
048
049        private boolean expectingNTax;
050
051        private boolean expectingNTaxEquals;
052
053        private boolean expectingNTaxValue;
054
055        private boolean expectingNChar;
056
057        private boolean expectingNCharEquals;
058
059        private boolean expectingNCharValue;
060
061        private boolean expectingFormat;
062
063        private boolean expectingEliminate;
064
065        private boolean expectingTaxLabel;
066
067        private boolean expectingTaxLabelValue;
068
069        private boolean expectingCharStateLabel;
070
071        private boolean expectingCharLabel;
072
073        private boolean expectingStateLabel;
074
075        private boolean expectingMatrix;
076
077        private boolean expectingDataType;
078
079        private boolean expectingDataTypeEquals;
080
081        private boolean expectingDataTypeContent;
082
083        private boolean expectingRespectCase;
084
085        private boolean expectingMissing;
086
087        private boolean expectingMissingEquals;
088
089        private boolean expectingMissingContent;
090
091        private boolean expectingGap;
092
093        private boolean expectingGapEquals;
094
095        private boolean expectingGapContent;
096
097        private boolean expectingSymbols;
098
099        private boolean expectingSymbolsEquals;
100
101        private boolean expectingSymbolsContent;
102
103        private boolean expectingEquate;
104
105        private boolean expectingEquateEquals;
106
107        private boolean expectingEquateContent;
108
109        private boolean expectingMatchChar;
110
111        private boolean expectingMatchCharEquals;
112
113        private boolean expectingMatchCharContent;
114
115        private boolean expectingLabels;
116
117        private boolean expectingTranspose;
118
119        private boolean expectingInterleave;
120
121        private boolean expectingItems;
122
123        private boolean expectingItemsEquals;
124
125        private boolean expectingItemsContent;
126
127        private boolean itemsInBrackets;
128
129        private boolean expectingStatesFormat;
130
131        private boolean expectingStatesFormatEquals;
132
133        private boolean expectingStatesFormatContent;
134
135        private boolean expectingTokens;
136
137        private String specifiedDataType;
138
139        private boolean tokenizedMatrix;
140
141        private boolean expectingEliminateRange;
142
143        private boolean expectingCharStateLabelKey;
144
145        private boolean expectingCharStateLabelName;
146
147        private boolean expectingCharStateLabelSynonym;
148
149        private boolean expectingCharLabelValue;
150
151        private boolean expectingStateLabelKey;
152
153        private boolean expectingStateLabelContent;
154
155        private boolean expectingMatrixKey;
156
157        private boolean expectingMatrixContent;
158
159        private String currentCharStateLabelKey;
160
161        private String currentStateLabelKey;
162
163        private String currentMatrixKey;
164
165        private List currentMatrixBracket;
166
167        private Map matrixStack = new HashMap();
168
169        private int matrixFirstLineLength;
170
171        private String matrixFirstLineKey;
172
173        private int matrixPrependNulls;
174
175        private boolean seenSymbol;
176
177        /**
178         * Delegates to NexusBlockParser.Abstract.
179         * 
180         * @param blockListener
181         *            the listener to send parse events to.
182         */
183        public CharactersBlockParser(CharactersBlockListener blockListener) {
184                super(blockListener);
185        }
186
187        public void resetStatus() {
188                this.expectingDimension = true;
189                this.expectingNewTaxa = false;
190                this.expectingNTax = false;
191                this.expectingNTaxEquals = false;
192                this.expectingNTaxValue = false;
193                this.expectingNChar = false;
194                this.expectingNCharEquals = false;
195                this.expectingNCharValue = false;
196                this.expectingFormat = false;
197                this.expectingEliminate = false;
198                this.expectingTaxLabel = false;
199                this.expectingTaxLabelValue = false;
200                this.expectingCharStateLabel = false;
201                this.expectingCharLabel = false;
202                this.expectingStateLabel = false;
203                this.expectingMatrix = false;
204                this.tokenizedMatrix = false;
205                this.specifiedDataType = null;
206                this.expectingDataType = false;
207                this.expectingDataTypeEquals = false;
208                this.expectingDataTypeContent = false;
209                this.expectingRespectCase = false;
210                this.expectingMissing = false;
211                this.expectingMissingEquals = false;
212                this.expectingMissingContent = false;
213                this.expectingGap = false;
214                this.expectingGapEquals = false;
215                this.expectingGapContent = false;
216                this.expectingSymbols = false;
217                this.expectingSymbolsEquals = false;
218                this.expectingSymbolsContent = false;
219                this.expectingEquate = false;
220                this.expectingEquateEquals = false;
221                this.expectingEquateContent = false;
222                this.expectingMatchChar = false;
223                this.expectingMatchCharEquals = false;
224                this.expectingMatchCharContent = false;
225                this.expectingLabels = false;
226                this.expectingTranspose = false;
227                this.expectingInterleave = false;
228                this.expectingItems = false;
229                this.expectingItemsEquals = false;
230                this.expectingItemsContent = false;
231                this.itemsInBrackets = false;
232                this.expectingStatesFormat = false;
233                this.expectingStatesFormatEquals = false;
234                this.expectingStatesFormatContent = false;
235                this.expectingTokens = false;
236                this.expectingEliminateRange = false;
237                this.expectingCharStateLabelKey = false;
238                this.expectingCharStateLabelName = false;
239                this.expectingCharStateLabelSynonym = false;
240                this.expectingCharLabelValue = false;
241                this.expectingStateLabelKey = false;
242                this.expectingStateLabelContent = false;
243                this.expectingMatrixKey = false;
244                this.expectingMatrixContent = false;
245                this.currentCharStateLabelKey = null;
246                this.currentStateLabelKey = null;
247                this.currentMatrixKey = null;
248                this.currentMatrixBracket = null;
249                this.matrixStack.clear();
250                this.matrixFirstLineKey = null;
251                this.matrixFirstLineLength = 0;
252                this.matrixPrependNulls = 0;
253                this.seenSymbol = false;
254        }
255
256        public boolean wantsBracketsAndBraces() {
257                return this.expectingMatrixContent;
258        }
259
260        public void parseToken(String token) throws ParseException {
261                if (this.expectingMatrixContent && "\n".equals(token)) {
262                        // Special handling for new lines inside matrix data.
263                        if (this.currentMatrixBracket != null) {
264                                ((CharactersBlockListener) this.getBlockListener())
265                                                .appendMatrixData(this.currentMatrixKey,
266                                                                this.currentMatrixBracket);
267                                this.currentMatrixBracket = null;
268                        }
269                        this.expectingMatrixContent = false;
270                        this.expectingMatrixKey = true;
271                } else if (this.expectingMatrixKey && "\n".equals(token)) {
272                        if (this.matrixFirstLineKey != null)
273                                this.matrixPrependNulls = this.matrixFirstLineLength;
274                } else if (token.trim().length() == 0)
275                        return;
276                else if (this.expectingDimension
277                                && "DIMENSIONS".equalsIgnoreCase(token)) {
278                        this.expectingDimension = false;
279                        this.expectingNewTaxa = true;
280                        this.expectingNTax = true;
281                        this.expectingNChar = true;
282                } else if (this.expectingNewTaxa && "NEWTAXA".equalsIgnoreCase(token)) {
283                        this.expectingNewTaxa = false;
284                        this.expectingNTax = true;
285                        this.expectingNChar = false;
286                } else if (this.expectingNTax && token.toUpperCase().startsWith("NTAX")) {
287                        this.expectingNewTaxa = false;
288                        this.expectingNTax = false;
289                        if (token.indexOf('=') >= 0) {
290                                final String[] parts = token.split("=");
291                                if (parts.length > 1) {
292                                        this.expectingNChar = true;
293                                        try {
294                                                ((CharactersBlockListener) this.getBlockListener())
295                                                                .setDimensionsNTax(Integer.parseInt(parts[1]));
296                                        } catch (NumberFormatException e) {
297                                                throw new ParseException("Invalid NTAX value: "
298                                                                + parts[1]);
299                                        }
300                                } else
301                                        this.expectingNTaxValue = true;
302                        } else
303                                this.expectingNTaxEquals = true;
304                } else if (this.expectingNTaxEquals && token.startsWith("=")) {
305                        this.expectingNTaxEquals = false;
306                        final String[] parts = token.split("=");
307                        if (parts.length > 1) {
308                                this.expectingNChar = true;
309                                try {
310                                        ((CharactersBlockListener) this.getBlockListener())
311                                                        .setDimensionsNTax(Integer.parseInt(parts[1]));
312                                } catch (NumberFormatException e) {
313                                        throw new ParseException("Invalid NTAX value: " + parts[1]);
314                                }
315                        } else
316                                this.expectingNTaxValue = true;
317                } else if (this.expectingNTaxValue) {
318                        this.expectingNTaxValue = false;
319                        try {
320                                ((CharactersBlockListener) this.getBlockListener())
321                                                .setDimensionsNTax(Integer.parseInt(token));
322                        } catch (NumberFormatException e) {
323                                throw new ParseException("Invalid NTAX value: " + token);
324                        }
325                        this.expectingNChar = true;
326                } else if (this.expectingNChar
327                                && token.toUpperCase().startsWith("NCHAR")) {
328                        this.expectingNChar = false;
329                        if (token.indexOf('=') >= 0) {
330                                final String[] parts = token.split("=");
331                                if (parts.length > 1) {
332                                        this.expectingFormat = true;
333                                        this.expectingEliminate = true;
334                                        this.expectingTaxLabel = true;
335                                        this.expectingCharStateLabel = true;
336                                        this.expectingCharLabel = true;
337                                        this.expectingStateLabel = true;
338                                        this.expectingMatrix = true;
339                                        try {
340                                                ((CharactersBlockListener) this.getBlockListener())
341                                                                .setDimensionsNChar(Integer.parseInt(parts[1]));
342                                        } catch (NumberFormatException e) {
343                                                throw new ParseException("Invalid NCHAR value: "
344                                                                + parts[1]);
345                                        }
346                                } else
347                                        this.expectingNCharValue = true;
348                        } else
349                                this.expectingNCharEquals = true;
350                } else if (this.expectingNCharEquals && token.startsWith("=")) {
351                        this.expectingNCharEquals = false;
352                        final String[] parts = token.split("=");
353                        if (parts.length > 1) {
354                                this.expectingFormat = true;
355                                this.expectingEliminate = true;
356                                this.expectingTaxLabel = true;
357                                this.expectingCharStateLabel = true;
358                                this.expectingCharLabel = true;
359                                this.expectingStateLabel = true;
360                                this.expectingMatrix = true;
361                                try {
362                                        ((CharactersBlockListener) this.getBlockListener())
363                                                        .setDimensionsNChar(Integer.parseInt(parts[1]));
364                                } catch (NumberFormatException e) {
365                                        throw new ParseException("Invalid NCHAR value: " + parts[1]);
366                                }
367                        } else
368                                this.expectingNCharValue = true;
369                } else if (this.expectingNCharValue) {
370                        this.expectingNCharValue = false;
371                        try {
372                                ((CharactersBlockListener) this.getBlockListener())
373                                                .setDimensionsNChar(Integer.parseInt(token));
374                        } catch (NumberFormatException e) {
375                                throw new ParseException("Invalid NCHAR value: " + token);
376                        }
377                        this.expectingFormat = true;
378                        this.expectingEliminate = true;
379                        this.expectingTaxLabel = true;
380                        this.expectingCharStateLabel = true;
381                        this.expectingCharLabel = true;
382                        this.expectingStateLabel = true;
383                        this.expectingMatrix = true;
384                }
385
386                else if (this.expectingFormat && "FORMAT".equalsIgnoreCase(token)) {
387                        this.expectingFormat = false;
388                        this.expectingDataType = true;
389                        this.expectingRespectCase = true;
390                        this.expectingMissing = true;
391                        this.expectingGap = true;
392                        this.expectingSymbols = true;
393                        this.expectingEquate = true;
394                        this.expectingMatchChar = true;
395                        this.expectingLabels = true;
396                        this.expectingTranspose = true;
397                        this.expectingInterleave = true;
398                        this.expectingItems = true;
399                        this.expectingStatesFormat = true;
400                        this.expectingTokens = true;
401                }
402
403                else if (this.expectingDataType
404                                && token.toUpperCase().startsWith("DATATYPE")) {
405                        this.expectingDataType = false;
406
407                        if (token.indexOf("=") >= 0) {
408                                final String[] parts = token.split("=");
409                                if (parts.length > 1) {
410                                        this.specifiedDataType = parts[1];
411                                        ((CharactersBlockListener) this.getBlockListener())
412                                                        .setDataType(parts[1]);
413                                } else
414                                        this.expectingDataTypeContent = true;
415                        } else
416                                this.expectingDataTypeEquals = true;
417                }
418
419                else if (this.expectingDataTypeEquals && token.startsWith("=")) {
420                        this.expectingDataTypeEquals = false;
421                        if (token.length() > 1) {
422                                token = token.substring(1);
423                                this.specifiedDataType = token;
424                                ((CharactersBlockListener) this.getBlockListener())
425                                                .setDataType(token);
426                        } else
427                                this.expectingDataTypeContent = true;
428                }
429
430                else if (this.expectingDataTypeContent) {
431                        this.specifiedDataType = token;
432                        ((CharactersBlockListener) this.getBlockListener())
433                                        .setDataType(token);
434                        this.expectingDataTypeContent = false;
435                }
436
437                else if (this.expectingRespectCase
438                                && "RESPECTCASE".equalsIgnoreCase(token)) {
439                        ((CharactersBlockListener) this.getBlockListener())
440                                        .setRespectCase(true);
441                        this.expectingRespectCase = false;
442                }
443
444                else if (this.expectingMissing
445                                && token.toUpperCase().startsWith("MISSING")) {
446                        this.expectingMissing = false;
447
448                        if (token.indexOf("=") >= 0) {
449                                final String[] parts = token.split("=");
450                                if (parts.length > 1)
451                                        ((CharactersBlockListener) this.getBlockListener())
452                                                        .setMissing(parts[1]);
453                                else
454                                        this.expectingMissingContent = true;
455                        } else
456                                this.expectingMissingEquals = true;
457                }
458
459                else if (this.expectingMissingEquals && token.startsWith("=")) {
460                        this.expectingMissingEquals = false;
461                        if (token.length() > 1)
462                                ((CharactersBlockListener) this.getBlockListener())
463                                                .setMissing(token.substring(1));
464                        else
465                                this.expectingMissingContent = true;
466                }
467
468                else if (this.expectingMissingContent) {
469                        ((CharactersBlockListener) this.getBlockListener())
470                                        .setMissing(token);
471                        this.expectingMissingContent = false;
472                }
473
474                else if (this.expectingGap && token.toUpperCase().startsWith("GAP")) {
475                        this.expectingGap = false;
476
477                        if (token.indexOf("=") >= 0) {
478                                final String[] parts = token.split("=");
479                                if (parts.length > 1)
480                                        ((CharactersBlockListener) this.getBlockListener())
481                                                        .setGap(parts[1]);
482                                else
483                                        this.expectingGapContent = true;
484                        } else
485                                this.expectingGapEquals = true;
486                }
487
488                else if (this.expectingGapEquals && token.startsWith("=")) {
489                        this.expectingGapEquals = false;
490                        if (token.length() > 1)
491                                ((CharactersBlockListener) this.getBlockListener())
492                                                .setGap(token.substring(1));
493                        else
494                                this.expectingGapContent = true;
495                }
496
497                else if (this.expectingGapContent) {
498                        ((CharactersBlockListener) this.getBlockListener()).setGap(token);
499                        this.expectingGapContent = false;
500                }
501
502                else if (this.expectingSymbols
503                                && token.toUpperCase().startsWith("SYMBOLS")) {
504                        this.expectingSymbols = false;
505
506                        if (token.indexOf("=") >= 0) {
507                                final String[] parts = token.split("=");
508                                if (parts.length > 1) {
509                                        if (!parts[1].startsWith("\""))
510                                                throw new ParseException(
511                                                                "Symbols string must start with '\"'");
512                                        parts[1] = parts[1].substring(1);
513                                        this.expectingSymbolsContent = true;
514                                        if (parts[1].endsWith("\"")) {
515                                                parts[1] = parts[1].substring(0, parts[1].length() - 1);
516                                                this.expectingSymbolsContent = false;
517                                        }
518                                        for (int i = 0; i < parts[1].length(); i++)
519                                                ((CharactersBlockListener) this.getBlockListener())
520                                                                .addSymbol("" + parts[1].charAt(i));
521                                } else
522                                        this.expectingSymbolsContent = true;
523                        } else
524                                this.expectingSymbolsEquals = true;
525                }
526
527                else if (this.expectingSymbolsEquals && token.startsWith("=")) {
528                        this.expectingSymbolsEquals = false;
529                        if (token.length() > 1) {
530                                token = token.substring(1);
531                                if (!token.startsWith("\""))
532                                        throw new ParseException(
533                                                        "Symbols string must start with '\"'");
534                                token = token.substring(1);
535                                this.expectingSymbolsContent = true;
536
537                                if (token.endsWith("\"")) {
538                                        token = token.substring(0, token.length() - 1);
539                                        this.expectingSymbolsContent = false;
540                                }
541                                for (int i = 0; i < token.length(); i++)
542                                        ((CharactersBlockListener) this.getBlockListener())
543                                                        .addSymbol("" + token.charAt(i));
544                        } else
545                                this.expectingSymbolsContent = true;
546                }
547
548                else if (this.expectingSymbolsContent) {
549                        if (token.startsWith("\""))
550                                token = token.substring(1);
551                        if (token.endsWith("\"")) {
552                                token = token.substring(0, token.length() - 1);
553                                this.expectingSymbolsContent = false;
554                        }
555                        if (token.equals(""))
556                                this.expectingSymbolsContent = !this.seenSymbol;
557                        else {
558                                for (int i = 0; i < token.length(); i++)
559                                        ((CharactersBlockListener) this.getBlockListener())
560                                                        .addSymbol("" + token.charAt(i));
561                                this.seenSymbol = true;
562                        }
563                }
564
565                else if (this.expectingEquate
566                                && token.toUpperCase().startsWith("EQUATE")) {
567                        this.expectingEquate = false;
568
569                        if (token.indexOf("=") >= 0) {
570                                final String[] parts = token.split("=");
571                                if (parts.length > 1) {
572                                        if (!parts[1].startsWith("\""))
573                                                throw new ParseException(
574                                                                "Symbols string must start with '\"'");
575                                        parts[1] = parts[1].substring(1);
576                                        this.expectingEquateContent = true;
577                                        if (parts[1].endsWith("\"")) {
578                                                parts[1] = parts[1].substring(0, parts[1].length() - 1);
579                                                this.expectingEquateContent = false;
580                                        }
581                                        final String[] subParts = parts[1].split("=");
582                                        final String symbol = subParts[0];
583                                        final StringBuffer text = new StringBuffer();
584                                        for (int i = 1; i < subParts.length; i++) {
585                                                if (i >= 2)
586                                                        text.append('=');
587                                                text.append(subParts[i]);
588                                        }
589                                        final List symbols = new ArrayList();
590                                        if (text.charAt(0) == '(')
591                                                symbols.addAll(Arrays.asList(text.substring(1,
592                                                                text.length() - 2).split("")));
593                                        else
594                                                symbols
595                                                                .addAll(Arrays
596                                                                                .asList(text.toString().split("")));
597                                        ((CharactersBlockListener) this.getBlockListener())
598                                                        .addEquate(symbol, symbols);
599                                } else
600                                        this.expectingEquateContent = true;
601                        } else
602                                this.expectingEquateEquals = true;
603                }
604
605                else if (this.expectingEquateEquals && token.startsWith("=")) {
606                        this.expectingEquateEquals = false;
607                        if (token.length() > 1) {
608                                token = token.substring(1);
609                                if (!token.startsWith("\""))
610                                        throw new ParseException(
611                                                        "Symbols string must start with '\"'");
612                                token = token.substring(1);
613                                this.expectingEquateContent = true;
614
615                                if (token.endsWith("\"")) {
616                                        token = token.substring(0, token.length() - 1);
617                                        this.expectingEquateContent = false;
618                                }
619                                final String[] subParts = token.split("=");
620                                final String symbol = subParts[0];
621                                final StringBuffer text = new StringBuffer();
622                                for (int i = 1; i < subParts.length; i++) {
623                                        if (i >= 2)
624                                                text.append('=');
625                                        text.append(subParts[i]);
626                                }
627                                final List symbols = new ArrayList();
628                                if (text.charAt(0) == '(')
629                                        symbols.addAll(Arrays.asList(text.substring(1,
630                                                        text.length() - 2).split("")));
631                                else
632                                        symbols.addAll(Arrays.asList(text.toString().split("")));
633                                ((CharactersBlockListener) this.getBlockListener()).addEquate(
634                                                symbol, symbols);
635                        } else
636                                this.expectingEquateContent = true;
637                }
638
639                else if (this.expectingEquateContent) {
640                        if (token.startsWith("\""))
641                                token = token.substring(1);
642                        if (token.endsWith("\"")) {
643                                token = token.substring(0, token.length() - 1);
644                                this.expectingEquateContent = false;
645                        }
646                        final String[] subParts = token.split("=");
647                        final String symbol = subParts[0];
648                        final StringBuffer text = new StringBuffer();
649                        for (int i = 1; i < subParts.length; i++) {
650                                if (i >= 2)
651                                        text.append('=');
652                                text.append(subParts[i]);
653                        }
654                        final List symbols = new ArrayList();
655                        if (text.charAt(0) == '(')
656                                symbols.addAll(Arrays.asList(text.substring(1,
657                                                text.length() - 2).split("")));
658                        else
659                                symbols.addAll(Arrays.asList(text.toString().split("")));
660                        ((CharactersBlockListener) this.getBlockListener()).addEquate(
661                                        symbol, symbols);
662                }
663
664                else if (this.expectingMatchChar
665                                && token.toUpperCase().startsWith("MATCHCHAR")) {
666                        this.expectingMatchChar = false;
667
668                        if (token.indexOf("=") >= 0) {
669                                final String[] parts = token.split("=");
670                                if (parts.length > 1)
671                                        ((CharactersBlockListener) this.getBlockListener())
672                                                        .setMatchChar(parts[1]);
673                                else
674                                        this.expectingMatchCharContent = true;
675                        } else
676                                this.expectingMatchCharEquals = true;
677                }
678
679                else if (this.expectingMatchCharEquals && token.startsWith("=")) {
680                        this.expectingMatchCharEquals = false;
681                        if (token.length() > 1)
682                                ((CharactersBlockListener) this.getBlockListener())
683                                                .setMatchChar(token.substring(1));
684                        else
685                                this.expectingMatchCharContent = true;
686                }
687
688                else if (this.expectingMatchCharContent) {
689                        ((CharactersBlockListener) this.getBlockListener())
690                                        .setMatchChar(token);
691                        this.expectingMatchCharContent = false;
692                }
693
694                else if (this.expectingLabels && "LABELS".equalsIgnoreCase(token)) {
695                        ((CharactersBlockListener) this.getBlockListener()).setLabels(true);
696                        this.expectingLabels = false;
697                }
698
699                else if (this.expectingLabels && "NOLABELS".equalsIgnoreCase(token)) {
700                        ((CharactersBlockListener) this.getBlockListener())
701                                        .setLabels(false);
702                        this.expectingLabels = false;
703                }
704
705                else if (this.expectingTranspose && "TRANSPOSE".equalsIgnoreCase(token)) {
706                        ((CharactersBlockListener) this.getBlockListener())
707                                        .setTransposed(true);
708                        this.expectingTranspose = false;
709                }
710
711                else if (this.expectingInterleave
712                                && token.toUpperCase().startsWith("INTERLEAVE")) {
713                        boolean interleaved = true;
714                        if (token.indexOf("=") >= 0) {
715                                final String[] parts = token.split("=");
716                                if (parts.length > 1) {
717                                        if (!("YES".equalsIgnoreCase(parts[1]) || "TRUE".equalsIgnoreCase(parts[1]))) {
718                                                interleaved = false;
719                                        }
720                                }
721                        }
722                        ((CharactersBlockListener) this.getBlockListener())
723                                        .setInterleaved(interleaved);
724                        this.expectingInterleave = false;
725                }
726
727                else if (this.expectingItems && token.toUpperCase().startsWith("ITEMS")) {
728                        this.expectingItems = false;
729
730                        if (token.indexOf("=") >= 0) {
731                                final String[] parts = token.split("=");
732                                if (parts.length > 1) {
733                                        if (parts[1].startsWith("(")) {
734                                                parts[1] = parts[1].substring(1);
735                                                this.itemsInBrackets = true;
736                                                this.expectingItemsContent = true;
737                                        }
738                                        if (parts[1].endsWith(")")) {
739                                                parts[1] = parts[1].substring(0, parts[1].length() - 1);
740                                                this.itemsInBrackets = false;
741                                                this.expectingItemsContent = false;
742                                        }
743                                        ((CharactersBlockListener) this.getBlockListener())
744                                                        .setStatesFormat(parts[1]);
745                                } else
746                                        this.expectingItemsContent = true;
747                        } else
748                                this.expectingItemsEquals = true;
749                }
750
751                else if (this.expectingItemsEquals && token.startsWith("=")) {
752                        this.expectingItemsEquals = false;
753                        if (token.length() > 1) {
754                                token = token.substring(1);
755                                if (token.startsWith("(")) {
756                                        token = token.substring(1);
757                                        this.itemsInBrackets = true;
758                                        this.expectingItemsContent = true;
759                                }
760                                if (token.endsWith(")")) {
761                                        token = token.substring(0, token.length() - 1);
762                                        this.itemsInBrackets = false;
763                                        this.expectingItemsContent = false;
764                                }
765                                ((CharactersBlockListener) this.getBlockListener())
766                                                .setStatesFormat(token);
767                        } else
768                                this.expectingItemsContent = true;
769                }
770
771                else if (this.expectingItemsContent) {
772                        if (token.startsWith("(")) {
773                                token = token.substring(1);
774                                this.itemsInBrackets = true;
775                                this.expectingItemsContent = true;
776                        }
777                        if (token.endsWith(")")) {
778                                token = token.substring(0, token.length() - 1);
779                                this.itemsInBrackets = false;
780                                this.expectingItemsContent = false;
781                        }
782                        ((CharactersBlockListener) this.getBlockListener())
783                                        .setStatesFormat(token);
784                        this.expectingItemsContent = this.itemsInBrackets;
785                }
786
787                else if (this.expectingStatesFormat
788                                && token.toUpperCase().startsWith("STATESFORMAT")) {
789                        this.expectingStatesFormat = false;
790
791                        if (token.indexOf("=") >= 0) {
792                                final String[] parts = token.split("=");
793                                if (parts.length > 1)
794                                        ((CharactersBlockListener) this.getBlockListener())
795                                                        .setStatesFormat(parts[1]);
796                                else
797                                        this.expectingStatesFormatContent = true;
798                        } else
799                                this.expectingStatesFormatEquals = true;
800                }
801
802                else if (this.expectingStatesFormatEquals && token.startsWith("=")) {
803                        this.expectingStatesFormatEquals = false;
804                        if (token.length() > 1)
805                                ((CharactersBlockListener) this.getBlockListener())
806                                                .setStatesFormat(token.substring(1));
807                        else
808                                this.expectingStatesFormatContent = true;
809                }
810
811                else if (this.expectingStatesFormatContent) {
812                        ((CharactersBlockListener) this.getBlockListener())
813                                        .setStatesFormat(token);
814                        this.expectingStatesFormatContent = false;
815                }
816
817                else if (this.expectingTokens && "TOKENS".equalsIgnoreCase(token)) {
818                        ((CharactersBlockListener) this.getBlockListener()).setTokens(true);
819                        this.expectingTokens = false;
820                        this.tokenizedMatrix = true;
821                }
822
823                else if (this.expectingTokens && "NOTOKENS".equalsIgnoreCase(token)) {
824                        ((CharactersBlockListener) this.getBlockListener())
825                                        .setTokens(false);
826                        this.expectingTokens = false;
827                        this.tokenizedMatrix = false;
828                }
829
830                else if (this.expectingEliminate && "ELIMINATE".equalsIgnoreCase(token)) {
831                        this.expectingFormat = false;
832                        this.expectingDataType = false;
833                        this.expectingRespectCase = false;
834                        this.expectingMissing = false;
835                        this.expectingGap = false;
836                        this.expectingSymbols = false;
837                        this.expectingEquate = false;
838                        this.expectingMatchChar = false;
839                        this.expectingLabels = false;
840                        this.expectingTranspose = false;
841                        this.expectingInterleave = false;
842                        this.expectingItems = false;
843                        this.expectingStatesFormat = false;
844                        this.expectingTokens = false;
845                        this.expectingEliminate = false;
846                        this.expectingEliminateRange = true;
847                }
848
849                else if (this.expectingEliminateRange) {
850                        final String parts[] = token.split("-");
851                        if (parts.length != 2)
852                                throw new ParseException("Eliminate range " + token
853                                                + " not in form X-Y");
854                        try {
855                                final int eliminateStart = Integer.parseInt(parts[0]);
856                                final int eliminateEnd = Integer.parseInt(parts[1]);
857                                ((CharactersBlockListener) this.getBlockListener())
858                                                .setEliminateStart(eliminateStart);
859                                ((CharactersBlockListener) this.getBlockListener())
860                                                .setEliminateEnd(eliminateEnd);
861                        } catch (NumberFormatException e) {
862                                throw new ParseException("Values in eliminate range " + token
863                                                + " not parseable integers");
864                        }
865                        this.expectingEliminateRange = false;
866                }
867
868                else if (this.expectingTaxLabel && "TAXLABELS".equalsIgnoreCase(token)) {
869                        this.expectingFormat = false;
870                        this.expectingDataType = false;
871                        this.expectingRespectCase = false;
872                        this.expectingMissing = false;
873                        this.expectingGap = false;
874                        this.expectingSymbols = false;
875                        this.expectingEquate = false;
876                        this.expectingMatchChar = false;
877                        this.expectingLabels = false;
878                        this.expectingTranspose = false;
879                        this.expectingInterleave = false;
880                        this.expectingItems = false;
881                        this.expectingStatesFormat = false;
882                        this.expectingTokens = false;
883                        this.expectingEliminate = false;
884                        this.expectingEliminateRange = false;
885                        this.expectingTaxLabel = false;
886                        this.expectingTaxLabelValue = true;
887                }
888
889                else if (this.expectingCharStateLabel
890                                && "CHARSTATELABELS".equalsIgnoreCase(token)) {
891                        this.expectingFormat = false;
892                        this.expectingDataType = false;
893                        this.expectingRespectCase = false;
894                        this.expectingMissing = false;
895                        this.expectingGap = false;
896                        this.expectingSymbols = false;
897                        this.expectingEquate = false;
898                        this.expectingMatchChar = false;
899                        this.expectingLabels = false;
900                        this.expectingTranspose = false;
901                        this.expectingInterleave = false;
902                        this.expectingItems = false;
903                        this.expectingStatesFormat = false;
904                        this.expectingTokens = false;
905                        this.expectingEliminate = false;
906                        this.expectingEliminateRange = false;
907                        this.expectingTaxLabel = false;
908                        this.expectingTaxLabelValue = false;
909                        this.expectingCharStateLabel = false;
910                        this.expectingCharStateLabelKey = true;
911                }
912
913                else if (this.expectingCharLabel
914                                && "CHARLABELS".equalsIgnoreCase(token)) {
915                        this.expectingFormat = false;
916                        this.expectingDataType = false;
917                        this.expectingRespectCase = false;
918                        this.expectingMissing = false;
919                        this.expectingGap = false;
920                        this.expectingSymbols = false;
921                        this.expectingEquate = false;
922                        this.expectingMatchChar = false;
923                        this.expectingLabels = false;
924                        this.expectingTranspose = false;
925                        this.expectingInterleave = false;
926                        this.expectingItems = false;
927                        this.expectingStatesFormat = false;
928                        this.expectingTokens = false;
929                        this.expectingEliminate = false;
930                        this.expectingEliminateRange = false;
931                        this.expectingTaxLabel = false;
932                        this.expectingTaxLabelValue = false;
933                        this.expectingCharStateLabel = false;
934                        this.expectingCharStateLabelKey = false;
935                        this.expectingCharStateLabelName = false;
936                        this.expectingCharStateLabelSynonym = false;
937                        this.expectingCharLabel = false;
938                        this.expectingCharLabelValue = true;
939                }
940
941                else if (this.expectingStateLabel
942                                && "STATELABELS".equalsIgnoreCase(token)) {
943                        this.expectingFormat = false;
944                        this.expectingDataType = false;
945                        this.expectingRespectCase = false;
946                        this.expectingMissing = false;
947                        this.expectingGap = false;
948                        this.expectingSymbols = false;
949                        this.expectingEquate = false;
950                        this.expectingMatchChar = false;
951                        this.expectingLabels = false;
952                        this.expectingTranspose = false;
953                        this.expectingInterleave = false;
954                        this.expectingItems = false;
955                        this.expectingStatesFormat = false;
956                        this.expectingTokens = false;
957                        this.expectingEliminate = false;
958                        this.expectingEliminateRange = false;
959                        this.expectingTaxLabel = false;
960                        this.expectingTaxLabelValue = false;
961                        this.expectingCharStateLabel = false;
962                        this.expectingCharStateLabelKey = false;
963                        this.expectingCharStateLabelName = false;
964                        this.expectingCharStateLabelSynonym = false;
965                        this.expectingCharLabel = false;
966                        this.expectingCharLabelValue = false;
967                        this.expectingStateLabel = false;
968                        this.expectingStateLabelKey = true;
969                }
970
971                else if (this.expectingMatrix && "MATRIX".equalsIgnoreCase(token)) {
972                        this.expectingFormat = false;
973                        this.expectingDataType = false;
974                        this.expectingRespectCase = false;
975                        this.expectingMissing = false;
976                        this.expectingGap = false;
977                        this.expectingSymbols = false;
978                        this.expectingEquate = false;
979                        this.expectingMatchChar = false;
980                        this.expectingLabels = false;
981                        this.expectingTranspose = false;
982                        this.expectingInterleave = false;
983                        this.expectingItems = false;
984                        this.expectingStatesFormat = false;
985                        this.expectingTokens = false;
986                        this.expectingEliminate = false;
987                        this.expectingEliminateRange = false;
988                        this.expectingTaxLabel = false;
989                        this.expectingTaxLabelValue = false;
990                        this.expectingCharStateLabel = false;
991                        this.expectingCharStateLabelKey = false;
992                        this.expectingCharStateLabelName = false;
993                        this.expectingCharStateLabelSynonym = false;
994                        this.expectingCharLabel = false;
995                        this.expectingCharLabelValue = false;
996                        this.expectingStateLabel = false;
997                        this.expectingStateLabelKey = false;
998                        this.expectingStateLabelContent = false;
999                        this.expectingMatrix = false;
1000                        this.expectingMatrixKey = true;
1001                }
1002
1003                else if (this.expectingTaxLabelValue)
1004                        // Use untoken version to preserve spaces.
1005                        ((CharactersBlockListener) this.getBlockListener())
1006                                        .addTaxLabel(token);
1007
1008                else if (this.expectingCharStateLabelKey) {
1009                        this.currentCharStateLabelKey = token;
1010                        // Use untoken version to preserve spaces.
1011                        ((CharactersBlockListener) this.getBlockListener())
1012                                        .addCharState(token);
1013                        this.expectingCharStateLabelKey = false;
1014                        this.expectingCharStateLabelName = true;
1015                }
1016
1017                else if (this.expectingCharStateLabelName) {
1018                        String actualName = token;
1019                        String firstSynonym = null;
1020                        if (token.indexOf("/") >= 0) {
1021                                actualName = token.substring(0, token.indexOf("/"));
1022                                if (token.indexOf("/") < token.length() - 2)
1023                                        firstSynonym = token.substring(token.indexOf("/") + 1);
1024                        }
1025                        final boolean skipSynonyms = actualName.endsWith(",")
1026                                        || (firstSynonym != null && firstSynonym.endsWith(","));
1027                        if (skipSynonyms) {
1028                                if (firstSynonym != null)
1029                                        firstSynonym = firstSynonym.substring(0, firstSynonym
1030                                                        .length() - 1);
1031                                else
1032                                        actualName = actualName.substring(0,
1033                                                        actualName.length() - 1);
1034                        }
1035                        // Use untoken version to preserve spaces.
1036                        ((CharactersBlockListener) this.getBlockListener())
1037                                        .setCharStateLabel(this.currentCharStateLabelKey,
1038                                                        actualName);
1039                        if (firstSynonym != null)
1040                                ((CharactersBlockListener) this.getBlockListener())
1041                                                .addCharStateKeyword(this.currentCharStateLabelKey,
1042                                                                token);
1043                        this.expectingCharStateLabelName = false;
1044                        if (!skipSynonyms)
1045                                this.expectingCharStateLabelSynonym = true;
1046                        else
1047                                this.expectingCharStateLabelKey = true;
1048                }
1049
1050                else if (this.expectingCharStateLabelSynonym) {
1051                        if (token.startsWith("/") && token.length() > 1)
1052                                token = token.substring(1);
1053                        final boolean skipSynonyms = token.endsWith(",");
1054                        if (skipSynonyms)
1055                                token = token.substring(0, token.length() - 1);
1056                        if (!"/".equals(token))
1057                                // Use untoken version to preserve spaces.
1058                                ((CharactersBlockListener) this.getBlockListener())
1059                                                .addCharStateKeyword(this.currentCharStateLabelKey,
1060                                                                token);
1061                        if (skipSynonyms) {
1062                                this.expectingCharStateLabelSynonym = false;
1063                                this.expectingCharStateLabelKey = true;
1064                        }
1065                }
1066
1067                else if (this.expectingCharLabelValue)
1068                        // Use untoken version to preserve spaces.
1069                        ((CharactersBlockListener) this.getBlockListener())
1070                                        .addCharLabel(token);
1071
1072                else if (this.expectingStateLabelKey) {
1073                        final boolean skipContent = token.endsWith(",");
1074                        if (skipContent)
1075                                token = token.substring(0, token.length() - 1);
1076                        this.currentStateLabelKey = token;
1077                        // Use untoken version to preserve spaces.
1078                        ((CharactersBlockListener) this.getBlockListener()).addState(token);
1079                        if (!skipContent) {
1080                                this.expectingStateLabelKey = false;
1081                                this.expectingStateLabelContent = true;
1082                        }
1083                }
1084
1085                else if (this.expectingStateLabelContent) {
1086                        final boolean skipContent = token.endsWith(",");
1087                        if (skipContent)
1088                                token = token.substring(0, token.length() - 1);
1089                        // Use untoken version to preserve spaces.
1090                        ((CharactersBlockListener) this.getBlockListener()).addStateLabel(
1091                                        this.currentStateLabelKey, token);
1092                        if (skipContent) {
1093                                this.expectingStateLabelKey = true;
1094                                this.expectingStateLabelContent = false;
1095                        }
1096                }
1097
1098                else if (this.expectingMatrixKey) {
1099                        this.currentMatrixKey = token;
1100                        // Use untoken version to preserve spaces.
1101                        ((CharactersBlockListener) this.getBlockListener())
1102                                        .addMatrixEntry(token);
1103                        this.expectingMatrixKey = false;
1104                        this.expectingMatrixContent = true;
1105                        // Update first line info and set up stack for entry.
1106                        if (!this.matrixStack.containsKey(token)) {
1107                                this.matrixStack.put(token, new Stack());
1108                                if (this.matrixPrependNulls > 0)
1109                                        for (int i = 0; i < this.matrixPrependNulls; i++)
1110                                                ((CharactersBlockListener) this.getBlockListener())
1111                                                                .appendMatrixData(this.currentMatrixKey, null);
1112                        }
1113                        if (this.matrixFirstLineKey == null)
1114                                this.matrixFirstLineKey = this.currentMatrixKey;
1115                }
1116
1117                else if (this.expectingMatrixContent) {
1118                        final Stack stack = (Stack) this.matrixStack
1119                                        .get(this.currentMatrixKey);
1120                        if ("(".equals(token)) {
1121                                final List newList = new ArrayList();
1122                                if (!stack.isEmpty())
1123                                        ((Collection) stack.peek()).add(newList);
1124                                else
1125                                        ((CharactersBlockListener) this.getBlockListener())
1126                                                        .appendMatrixData(this.currentMatrixKey, newList);
1127                                stack.push(newList);
1128                        } else if ("{".equals(token)) {
1129                                final Set newSet = new LinkedHashSet();
1130                                if (!stack.isEmpty())
1131                                        ((Collection) stack.peek()).add(newSet);
1132                                else
1133                                        ((CharactersBlockListener) this.getBlockListener())
1134                                                        .appendMatrixData(this.currentMatrixKey, newSet);
1135                                stack.push(newSet);
1136                        } else if (")".equals(token) && !stack.isEmpty()
1137                                        && (stack.peek() instanceof List)) {
1138                                stack.pop();
1139                                if (stack.isEmpty()
1140                                                && this.currentMatrixKey
1141                                                                .equals(this.matrixFirstLineKey))
1142                                        this.matrixFirstLineLength++;
1143                        } else if ("}".equals(token) && !stack.isEmpty()
1144                                        && (stack.peek() instanceof Set)) {
1145                                stack.pop();
1146                                if (stack.isEmpty()
1147                                                && this.currentMatrixKey
1148                                                                .equals(this.matrixFirstLineKey))
1149                                        this.matrixFirstLineLength++;
1150                        } else {
1151                                final boolean reallyUseTokens = (this.tokenizedMatrix || "CONTINUOUS"
1152                                                .equals(this.specifiedDataType))
1153                                                && !("DNA".equals(this.specifiedDataType)
1154                                                                || "RNA".equals(this.specifiedDataType) || "NUCLEOTIDE"
1155                                                                .equals(this.specifiedDataType));
1156                                if (reallyUseTokens) {
1157                                        if (!stack.isEmpty())
1158                                                ((Collection) stack.peek()).add(token);
1159                                        else {
1160                                                ((CharactersBlockListener) this.getBlockListener())
1161                                                                .appendMatrixData(this.currentMatrixKey, token);
1162                                                if (this.currentMatrixKey
1163                                                                .equals(this.matrixFirstLineKey))
1164                                                        this.matrixFirstLineLength++;
1165                                        }
1166                                } else {
1167                                        final String[] toks = token.split("");
1168                                        for (int i = 0; i < toks.length; i++) {
1169                                                final String tok = toks[i];
1170                                                if (!stack.isEmpty())
1171                                                        ((Collection) stack.peek()).add(tok);
1172                                                else {
1173                                                        ((CharactersBlockListener) this.getBlockListener())
1174                                                                        .appendMatrixData(this.currentMatrixKey,
1175                                                                                        tok);
1176                                                        if (this.currentMatrixKey
1177                                                                        .equals(this.matrixFirstLineKey))
1178                                                                this.matrixFirstLineLength++;
1179                                                }
1180                                        }
1181                                }
1182                        }
1183                }
1184
1185                else
1186                        throw new ParseException("Found unexpected token " + token
1187                                        + " in CHARACTERS block");
1188        }
1189}