001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.phylo.io.nexus; 022 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collection; 026import java.util.HashMap; 027import java.util.LinkedHashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.Stack; 032 033import org.biojava.bio.seq.io.ParseException; 034 035/** 036 * Parses Nexus characters blocks. 037 * 038 * @author Richard Holland 039 * @author Tobias Thierer 040 * @author Jim Balhoff 041 * @since 1.6 042 */ 043public class CharactersBlockParser extends NexusBlockParser.Abstract { 044 045 private boolean expectingDimension; 046 047 private boolean expectingNewTaxa; 048 049 private boolean expectingNTax; 050 051 private boolean expectingNTaxEquals; 052 053 private boolean expectingNTaxValue; 054 055 private boolean expectingNChar; 056 057 private boolean expectingNCharEquals; 058 059 private boolean expectingNCharValue; 060 061 private boolean expectingFormat; 062 063 private boolean expectingEliminate; 064 065 private boolean expectingTaxLabel; 066 067 private boolean expectingTaxLabelValue; 068 069 private boolean expectingCharStateLabel; 070 071 private boolean expectingCharLabel; 072 073 private boolean expectingStateLabel; 074 075 private boolean expectingMatrix; 076 077 private boolean expectingDataType; 078 079 private boolean expectingDataTypeEquals; 080 081 private boolean expectingDataTypeContent; 082 083 private boolean expectingRespectCase; 084 085 private boolean expectingMissing; 086 087 private boolean expectingMissingEquals; 088 089 private boolean expectingMissingContent; 090 091 private boolean expectingGap; 092 093 private boolean expectingGapEquals; 094 095 private boolean expectingGapContent; 096 097 private boolean expectingSymbols; 098 099 private boolean expectingSymbolsEquals; 100 101 private boolean expectingSymbolsContent; 102 103 private boolean expectingEquate; 104 105 private boolean expectingEquateEquals; 106 107 private boolean expectingEquateContent; 108 109 private boolean expectingMatchChar; 110 111 private boolean expectingMatchCharEquals; 112 113 private boolean expectingMatchCharContent; 114 115 private boolean expectingLabels; 116 117 private boolean expectingTranspose; 118 119 private boolean expectingInterleave; 120 121 private boolean expectingItems; 122 123 private boolean expectingItemsEquals; 124 125 private boolean expectingItemsContent; 126 127 private boolean itemsInBrackets; 128 129 private boolean expectingStatesFormat; 130 131 private boolean expectingStatesFormatEquals; 132 133 private boolean expectingStatesFormatContent; 134 135 private boolean expectingTokens; 136 137 private String specifiedDataType; 138 139 private boolean tokenizedMatrix; 140 141 private boolean expectingEliminateRange; 142 143 private boolean expectingCharStateLabelKey; 144 145 private boolean expectingCharStateLabelName; 146 147 private boolean expectingCharStateLabelSynonym; 148 149 private boolean expectingCharLabelValue; 150 151 private boolean expectingStateLabelKey; 152 153 private boolean expectingStateLabelContent; 154 155 private boolean expectingMatrixKey; 156 157 private boolean expectingMatrixContent; 158 159 private String currentCharStateLabelKey; 160 161 private String currentStateLabelKey; 162 163 private String currentMatrixKey; 164 165 private List currentMatrixBracket; 166 167 private Map matrixStack = new HashMap(); 168 169 private int matrixFirstLineLength; 170 171 private String matrixFirstLineKey; 172 173 private int matrixPrependNulls; 174 175 private boolean seenSymbol; 176 177 /** 178 * Delegates to NexusBlockParser.Abstract. 179 * 180 * @param blockListener 181 * the listener to send parse events to. 182 */ 183 public CharactersBlockParser(CharactersBlockListener blockListener) { 184 super(blockListener); 185 } 186 187 public void resetStatus() { 188 this.expectingDimension = true; 189 this.expectingNewTaxa = false; 190 this.expectingNTax = false; 191 this.expectingNTaxEquals = false; 192 this.expectingNTaxValue = false; 193 this.expectingNChar = false; 194 this.expectingNCharEquals = false; 195 this.expectingNCharValue = false; 196 this.expectingFormat = false; 197 this.expectingEliminate = false; 198 this.expectingTaxLabel = false; 199 this.expectingTaxLabelValue = false; 200 this.expectingCharStateLabel = false; 201 this.expectingCharLabel = false; 202 this.expectingStateLabel = false; 203 this.expectingMatrix = false; 204 this.tokenizedMatrix = false; 205 this.specifiedDataType = null; 206 this.expectingDataType = false; 207 this.expectingDataTypeEquals = false; 208 this.expectingDataTypeContent = false; 209 this.expectingRespectCase = false; 210 this.expectingMissing = false; 211 this.expectingMissingEquals = false; 212 this.expectingMissingContent = false; 213 this.expectingGap = false; 214 this.expectingGapEquals = false; 215 this.expectingGapContent = false; 216 this.expectingSymbols = false; 217 this.expectingSymbolsEquals = false; 218 this.expectingSymbolsContent = false; 219 this.expectingEquate = false; 220 this.expectingEquateEquals = false; 221 this.expectingEquateContent = false; 222 this.expectingMatchChar = false; 223 this.expectingMatchCharEquals = false; 224 this.expectingMatchCharContent = false; 225 this.expectingLabels = false; 226 this.expectingTranspose = false; 227 this.expectingInterleave = false; 228 this.expectingItems = false; 229 this.expectingItemsEquals = false; 230 this.expectingItemsContent = false; 231 this.itemsInBrackets = false; 232 this.expectingStatesFormat = false; 233 this.expectingStatesFormatEquals = false; 234 this.expectingStatesFormatContent = false; 235 this.expectingTokens = false; 236 this.expectingEliminateRange = false; 237 this.expectingCharStateLabelKey = false; 238 this.expectingCharStateLabelName = false; 239 this.expectingCharStateLabelSynonym = false; 240 this.expectingCharLabelValue = false; 241 this.expectingStateLabelKey = false; 242 this.expectingStateLabelContent = false; 243 this.expectingMatrixKey = false; 244 this.expectingMatrixContent = false; 245 this.currentCharStateLabelKey = null; 246 this.currentStateLabelKey = null; 247 this.currentMatrixKey = null; 248 this.currentMatrixBracket = null; 249 this.matrixStack.clear(); 250 this.matrixFirstLineKey = null; 251 this.matrixFirstLineLength = 0; 252 this.matrixPrependNulls = 0; 253 this.seenSymbol = false; 254 } 255 256 public boolean wantsBracketsAndBraces() { 257 return this.expectingMatrixContent; 258 } 259 260 public void parseToken(String token) throws ParseException { 261 if (this.expectingMatrixContent && "\n".equals(token)) { 262 // Special handling for new lines inside matrix data. 263 if (this.currentMatrixBracket != null) { 264 ((CharactersBlockListener) this.getBlockListener()) 265 .appendMatrixData(this.currentMatrixKey, 266 this.currentMatrixBracket); 267 this.currentMatrixBracket = null; 268 } 269 this.expectingMatrixContent = false; 270 this.expectingMatrixKey = true; 271 } else if (this.expectingMatrixKey && "\n".equals(token)) { 272 if (this.matrixFirstLineKey != null) 273 this.matrixPrependNulls = this.matrixFirstLineLength; 274 } else if (token.trim().length() == 0) 275 return; 276 else if (this.expectingDimension 277 && "DIMENSIONS".equalsIgnoreCase(token)) { 278 this.expectingDimension = false; 279 this.expectingNewTaxa = true; 280 this.expectingNTax = true; 281 this.expectingNChar = true; 282 } else if (this.expectingNewTaxa && "NEWTAXA".equalsIgnoreCase(token)) { 283 this.expectingNewTaxa = false; 284 this.expectingNTax = true; 285 this.expectingNChar = false; 286 } else if (this.expectingNTax && token.toUpperCase().startsWith("NTAX")) { 287 this.expectingNewTaxa = false; 288 this.expectingNTax = false; 289 if (token.indexOf('=') >= 0) { 290 final String[] parts = token.split("="); 291 if (parts.length > 1) { 292 this.expectingNChar = true; 293 try { 294 ((CharactersBlockListener) this.getBlockListener()) 295 .setDimensionsNTax(Integer.parseInt(parts[1])); 296 } catch (NumberFormatException e) { 297 throw new ParseException("Invalid NTAX value: " 298 + parts[1]); 299 } 300 } else 301 this.expectingNTaxValue = true; 302 } else 303 this.expectingNTaxEquals = true; 304 } else if (this.expectingNTaxEquals && token.startsWith("=")) { 305 this.expectingNTaxEquals = false; 306 final String[] parts = token.split("="); 307 if (parts.length > 1) { 308 this.expectingNChar = true; 309 try { 310 ((CharactersBlockListener) this.getBlockListener()) 311 .setDimensionsNTax(Integer.parseInt(parts[1])); 312 } catch (NumberFormatException e) { 313 throw new ParseException("Invalid NTAX value: " + parts[1]); 314 } 315 } else 316 this.expectingNTaxValue = true; 317 } else if (this.expectingNTaxValue) { 318 this.expectingNTaxValue = false; 319 try { 320 ((CharactersBlockListener) this.getBlockListener()) 321 .setDimensionsNTax(Integer.parseInt(token)); 322 } catch (NumberFormatException e) { 323 throw new ParseException("Invalid NTAX value: " + token); 324 } 325 this.expectingNChar = true; 326 } else if (this.expectingNChar 327 && token.toUpperCase().startsWith("NCHAR")) { 328 this.expectingNChar = false; 329 if (token.indexOf('=') >= 0) { 330 final String[] parts = token.split("="); 331 if (parts.length > 1) { 332 this.expectingFormat = true; 333 this.expectingEliminate = true; 334 this.expectingTaxLabel = true; 335 this.expectingCharStateLabel = true; 336 this.expectingCharLabel = true; 337 this.expectingStateLabel = true; 338 this.expectingMatrix = true; 339 try { 340 ((CharactersBlockListener) this.getBlockListener()) 341 .setDimensionsNChar(Integer.parseInt(parts[1])); 342 } catch (NumberFormatException e) { 343 throw new ParseException("Invalid NCHAR value: " 344 + parts[1]); 345 } 346 } else 347 this.expectingNCharValue = true; 348 } else 349 this.expectingNCharEquals = true; 350 } else if (this.expectingNCharEquals && token.startsWith("=")) { 351 this.expectingNCharEquals = false; 352 final String[] parts = token.split("="); 353 if (parts.length > 1) { 354 this.expectingFormat = true; 355 this.expectingEliminate = true; 356 this.expectingTaxLabel = true; 357 this.expectingCharStateLabel = true; 358 this.expectingCharLabel = true; 359 this.expectingStateLabel = true; 360 this.expectingMatrix = true; 361 try { 362 ((CharactersBlockListener) this.getBlockListener()) 363 .setDimensionsNChar(Integer.parseInt(parts[1])); 364 } catch (NumberFormatException e) { 365 throw new ParseException("Invalid NCHAR value: " + parts[1]); 366 } 367 } else 368 this.expectingNCharValue = true; 369 } else if (this.expectingNCharValue) { 370 this.expectingNCharValue = false; 371 try { 372 ((CharactersBlockListener) this.getBlockListener()) 373 .setDimensionsNChar(Integer.parseInt(token)); 374 } catch (NumberFormatException e) { 375 throw new ParseException("Invalid NCHAR value: " + token); 376 } 377 this.expectingFormat = true; 378 this.expectingEliminate = true; 379 this.expectingTaxLabel = true; 380 this.expectingCharStateLabel = true; 381 this.expectingCharLabel = true; 382 this.expectingStateLabel = true; 383 this.expectingMatrix = true; 384 } 385 386 else if (this.expectingFormat && "FORMAT".equalsIgnoreCase(token)) { 387 this.expectingFormat = false; 388 this.expectingDataType = true; 389 this.expectingRespectCase = true; 390 this.expectingMissing = true; 391 this.expectingGap = true; 392 this.expectingSymbols = true; 393 this.expectingEquate = true; 394 this.expectingMatchChar = true; 395 this.expectingLabels = true; 396 this.expectingTranspose = true; 397 this.expectingInterleave = true; 398 this.expectingItems = true; 399 this.expectingStatesFormat = true; 400 this.expectingTokens = true; 401 } 402 403 else if (this.expectingDataType 404 && token.toUpperCase().startsWith("DATATYPE")) { 405 this.expectingDataType = false; 406 407 if (token.indexOf("=") >= 0) { 408 final String[] parts = token.split("="); 409 if (parts.length > 1) { 410 this.specifiedDataType = parts[1]; 411 ((CharactersBlockListener) this.getBlockListener()) 412 .setDataType(parts[1]); 413 } else 414 this.expectingDataTypeContent = true; 415 } else 416 this.expectingDataTypeEquals = true; 417 } 418 419 else if (this.expectingDataTypeEquals && token.startsWith("=")) { 420 this.expectingDataTypeEquals = false; 421 if (token.length() > 1) { 422 token = token.substring(1); 423 this.specifiedDataType = token; 424 ((CharactersBlockListener) this.getBlockListener()) 425 .setDataType(token); 426 } else 427 this.expectingDataTypeContent = true; 428 } 429 430 else if (this.expectingDataTypeContent) { 431 this.specifiedDataType = token; 432 ((CharactersBlockListener) this.getBlockListener()) 433 .setDataType(token); 434 this.expectingDataTypeContent = false; 435 } 436 437 else if (this.expectingRespectCase 438 && "RESPECTCASE".equalsIgnoreCase(token)) { 439 ((CharactersBlockListener) this.getBlockListener()) 440 .setRespectCase(true); 441 this.expectingRespectCase = false; 442 } 443 444 else if (this.expectingMissing 445 && token.toUpperCase().startsWith("MISSING")) { 446 this.expectingMissing = false; 447 448 if (token.indexOf("=") >= 0) { 449 final String[] parts = token.split("="); 450 if (parts.length > 1) 451 ((CharactersBlockListener) this.getBlockListener()) 452 .setMissing(parts[1]); 453 else 454 this.expectingMissingContent = true; 455 } else 456 this.expectingMissingEquals = true; 457 } 458 459 else if (this.expectingMissingEquals && token.startsWith("=")) { 460 this.expectingMissingEquals = false; 461 if (token.length() > 1) 462 ((CharactersBlockListener) this.getBlockListener()) 463 .setMissing(token.substring(1)); 464 else 465 this.expectingMissingContent = true; 466 } 467 468 else if (this.expectingMissingContent) { 469 ((CharactersBlockListener) this.getBlockListener()) 470 .setMissing(token); 471 this.expectingMissingContent = false; 472 } 473 474 else if (this.expectingGap && token.toUpperCase().startsWith("GAP")) { 475 this.expectingGap = false; 476 477 if (token.indexOf("=") >= 0) { 478 final String[] parts = token.split("="); 479 if (parts.length > 1) 480 ((CharactersBlockListener) this.getBlockListener()) 481 .setGap(parts[1]); 482 else 483 this.expectingGapContent = true; 484 } else 485 this.expectingGapEquals = true; 486 } 487 488 else if (this.expectingGapEquals && token.startsWith("=")) { 489 this.expectingGapEquals = false; 490 if (token.length() > 1) 491 ((CharactersBlockListener) this.getBlockListener()) 492 .setGap(token.substring(1)); 493 else 494 this.expectingGapContent = true; 495 } 496 497 else if (this.expectingGapContent) { 498 ((CharactersBlockListener) this.getBlockListener()).setGap(token); 499 this.expectingGapContent = false; 500 } 501 502 else if (this.expectingSymbols 503 && token.toUpperCase().startsWith("SYMBOLS")) { 504 this.expectingSymbols = false; 505 506 if (token.indexOf("=") >= 0) { 507 final String[] parts = token.split("="); 508 if (parts.length > 1) { 509 if (!parts[1].startsWith("\"")) 510 throw new ParseException( 511 "Symbols string must start with '\"'"); 512 parts[1] = parts[1].substring(1); 513 this.expectingSymbolsContent = true; 514 if (parts[1].endsWith("\"")) { 515 parts[1] = parts[1].substring(0, parts[1].length() - 1); 516 this.expectingSymbolsContent = false; 517 } 518 for (int i = 0; i < parts[1].length(); i++) 519 ((CharactersBlockListener) this.getBlockListener()) 520 .addSymbol("" + parts[1].charAt(i)); 521 } else 522 this.expectingSymbolsContent = true; 523 } else 524 this.expectingSymbolsEquals = true; 525 } 526 527 else if (this.expectingSymbolsEquals && token.startsWith("=")) { 528 this.expectingSymbolsEquals = false; 529 if (token.length() > 1) { 530 token = token.substring(1); 531 if (!token.startsWith("\"")) 532 throw new ParseException( 533 "Symbols string must start with '\"'"); 534 token = token.substring(1); 535 this.expectingSymbolsContent = true; 536 537 if (token.endsWith("\"")) { 538 token = token.substring(0, token.length() - 1); 539 this.expectingSymbolsContent = false; 540 } 541 for (int i = 0; i < token.length(); i++) 542 ((CharactersBlockListener) this.getBlockListener()) 543 .addSymbol("" + token.charAt(i)); 544 } else 545 this.expectingSymbolsContent = true; 546 } 547 548 else if (this.expectingSymbolsContent) { 549 if (token.startsWith("\"")) 550 token = token.substring(1); 551 if (token.endsWith("\"")) { 552 token = token.substring(0, token.length() - 1); 553 this.expectingSymbolsContent = false; 554 } 555 if (token.equals("")) 556 this.expectingSymbolsContent = !this.seenSymbol; 557 else { 558 for (int i = 0; i < token.length(); i++) 559 ((CharactersBlockListener) this.getBlockListener()) 560 .addSymbol("" + token.charAt(i)); 561 this.seenSymbol = true; 562 } 563 } 564 565 else if (this.expectingEquate 566 && token.toUpperCase().startsWith("EQUATE")) { 567 this.expectingEquate = false; 568 569 if (token.indexOf("=") >= 0) { 570 final String[] parts = token.split("="); 571 if (parts.length > 1) { 572 if (!parts[1].startsWith("\"")) 573 throw new ParseException( 574 "Symbols string must start with '\"'"); 575 parts[1] = parts[1].substring(1); 576 this.expectingEquateContent = true; 577 if (parts[1].endsWith("\"")) { 578 parts[1] = parts[1].substring(0, parts[1].length() - 1); 579 this.expectingEquateContent = false; 580 } 581 final String[] subParts = parts[1].split("="); 582 final String symbol = subParts[0]; 583 final StringBuffer text = new StringBuffer(); 584 for (int i = 1; i < subParts.length; i++) { 585 if (i >= 2) 586 text.append('='); 587 text.append(subParts[i]); 588 } 589 final List symbols = new ArrayList(); 590 if (text.charAt(0) == '(') 591 symbols.addAll(Arrays.asList(text.substring(1, 592 text.length() - 2).split(""))); 593 else 594 symbols 595 .addAll(Arrays 596 .asList(text.toString().split(""))); 597 ((CharactersBlockListener) this.getBlockListener()) 598 .addEquate(symbol, symbols); 599 } else 600 this.expectingEquateContent = true; 601 } else 602 this.expectingEquateEquals = true; 603 } 604 605 else if (this.expectingEquateEquals && token.startsWith("=")) { 606 this.expectingEquateEquals = false; 607 if (token.length() > 1) { 608 token = token.substring(1); 609 if (!token.startsWith("\"")) 610 throw new ParseException( 611 "Symbols string must start with '\"'"); 612 token = token.substring(1); 613 this.expectingEquateContent = true; 614 615 if (token.endsWith("\"")) { 616 token = token.substring(0, token.length() - 1); 617 this.expectingEquateContent = false; 618 } 619 final String[] subParts = token.split("="); 620 final String symbol = subParts[0]; 621 final StringBuffer text = new StringBuffer(); 622 for (int i = 1; i < subParts.length; i++) { 623 if (i >= 2) 624 text.append('='); 625 text.append(subParts[i]); 626 } 627 final List symbols = new ArrayList(); 628 if (text.charAt(0) == '(') 629 symbols.addAll(Arrays.asList(text.substring(1, 630 text.length() - 2).split(""))); 631 else 632 symbols.addAll(Arrays.asList(text.toString().split(""))); 633 ((CharactersBlockListener) this.getBlockListener()).addEquate( 634 symbol, symbols); 635 } else 636 this.expectingEquateContent = true; 637 } 638 639 else if (this.expectingEquateContent) { 640 if (token.startsWith("\"")) 641 token = token.substring(1); 642 if (token.endsWith("\"")) { 643 token = token.substring(0, token.length() - 1); 644 this.expectingEquateContent = false; 645 } 646 final String[] subParts = token.split("="); 647 final String symbol = subParts[0]; 648 final StringBuffer text = new StringBuffer(); 649 for (int i = 1; i < subParts.length; i++) { 650 if (i >= 2) 651 text.append('='); 652 text.append(subParts[i]); 653 } 654 final List symbols = new ArrayList(); 655 if (text.charAt(0) == '(') 656 symbols.addAll(Arrays.asList(text.substring(1, 657 text.length() - 2).split(""))); 658 else 659 symbols.addAll(Arrays.asList(text.toString().split(""))); 660 ((CharactersBlockListener) this.getBlockListener()).addEquate( 661 symbol, symbols); 662 } 663 664 else if (this.expectingMatchChar 665 && token.toUpperCase().startsWith("MATCHCHAR")) { 666 this.expectingMatchChar = false; 667 668 if (token.indexOf("=") >= 0) { 669 final String[] parts = token.split("="); 670 if (parts.length > 1) 671 ((CharactersBlockListener) this.getBlockListener()) 672 .setMatchChar(parts[1]); 673 else 674 this.expectingMatchCharContent = true; 675 } else 676 this.expectingMatchCharEquals = true; 677 } 678 679 else if (this.expectingMatchCharEquals && token.startsWith("=")) { 680 this.expectingMatchCharEquals = false; 681 if (token.length() > 1) 682 ((CharactersBlockListener) this.getBlockListener()) 683 .setMatchChar(token.substring(1)); 684 else 685 this.expectingMatchCharContent = true; 686 } 687 688 else if (this.expectingMatchCharContent) { 689 ((CharactersBlockListener) this.getBlockListener()) 690 .setMatchChar(token); 691 this.expectingMatchCharContent = false; 692 } 693 694 else if (this.expectingLabels && "LABELS".equalsIgnoreCase(token)) { 695 ((CharactersBlockListener) this.getBlockListener()).setLabels(true); 696 this.expectingLabels = false; 697 } 698 699 else if (this.expectingLabels && "NOLABELS".equalsIgnoreCase(token)) { 700 ((CharactersBlockListener) this.getBlockListener()) 701 .setLabels(false); 702 this.expectingLabels = false; 703 } 704 705 else if (this.expectingTranspose && "TRANSPOSE".equalsIgnoreCase(token)) { 706 ((CharactersBlockListener) this.getBlockListener()) 707 .setTransposed(true); 708 this.expectingTranspose = false; 709 } 710 711 else if (this.expectingInterleave 712 && token.toUpperCase().startsWith("INTERLEAVE")) { 713 boolean interleaved = true; 714 if (token.indexOf("=") >= 0) { 715 final String[] parts = token.split("="); 716 if (parts.length > 1) { 717 if (!("YES".equalsIgnoreCase(parts[1]) || "TRUE".equalsIgnoreCase(parts[1]))) { 718 interleaved = false; 719 } 720 } 721 } 722 ((CharactersBlockListener) this.getBlockListener()) 723 .setInterleaved(interleaved); 724 this.expectingInterleave = false; 725 } 726 727 else if (this.expectingItems && token.toUpperCase().startsWith("ITEMS")) { 728 this.expectingItems = false; 729 730 if (token.indexOf("=") >= 0) { 731 final String[] parts = token.split("="); 732 if (parts.length > 1) { 733 if (parts[1].startsWith("(")) { 734 parts[1] = parts[1].substring(1); 735 this.itemsInBrackets = true; 736 this.expectingItemsContent = true; 737 } 738 if (parts[1].endsWith(")")) { 739 parts[1] = parts[1].substring(0, parts[1].length() - 1); 740 this.itemsInBrackets = false; 741 this.expectingItemsContent = false; 742 } 743 ((CharactersBlockListener) this.getBlockListener()) 744 .setStatesFormat(parts[1]); 745 } else 746 this.expectingItemsContent = true; 747 } else 748 this.expectingItemsEquals = true; 749 } 750 751 else if (this.expectingItemsEquals && token.startsWith("=")) { 752 this.expectingItemsEquals = false; 753 if (token.length() > 1) { 754 token = token.substring(1); 755 if (token.startsWith("(")) { 756 token = token.substring(1); 757 this.itemsInBrackets = true; 758 this.expectingItemsContent = true; 759 } 760 if (token.endsWith(")")) { 761 token = token.substring(0, token.length() - 1); 762 this.itemsInBrackets = false; 763 this.expectingItemsContent = false; 764 } 765 ((CharactersBlockListener) this.getBlockListener()) 766 .setStatesFormat(token); 767 } else 768 this.expectingItemsContent = true; 769 } 770 771 else if (this.expectingItemsContent) { 772 if (token.startsWith("(")) { 773 token = token.substring(1); 774 this.itemsInBrackets = true; 775 this.expectingItemsContent = true; 776 } 777 if (token.endsWith(")")) { 778 token = token.substring(0, token.length() - 1); 779 this.itemsInBrackets = false; 780 this.expectingItemsContent = false; 781 } 782 ((CharactersBlockListener) this.getBlockListener()) 783 .setStatesFormat(token); 784 this.expectingItemsContent = this.itemsInBrackets; 785 } 786 787 else if (this.expectingStatesFormat 788 && token.toUpperCase().startsWith("STATESFORMAT")) { 789 this.expectingStatesFormat = false; 790 791 if (token.indexOf("=") >= 0) { 792 final String[] parts = token.split("="); 793 if (parts.length > 1) 794 ((CharactersBlockListener) this.getBlockListener()) 795 .setStatesFormat(parts[1]); 796 else 797 this.expectingStatesFormatContent = true; 798 } else 799 this.expectingStatesFormatEquals = true; 800 } 801 802 else if (this.expectingStatesFormatEquals && token.startsWith("=")) { 803 this.expectingStatesFormatEquals = false; 804 if (token.length() > 1) 805 ((CharactersBlockListener) this.getBlockListener()) 806 .setStatesFormat(token.substring(1)); 807 else 808 this.expectingStatesFormatContent = true; 809 } 810 811 else if (this.expectingStatesFormatContent) { 812 ((CharactersBlockListener) this.getBlockListener()) 813 .setStatesFormat(token); 814 this.expectingStatesFormatContent = false; 815 } 816 817 else if (this.expectingTokens && "TOKENS".equalsIgnoreCase(token)) { 818 ((CharactersBlockListener) this.getBlockListener()).setTokens(true); 819 this.expectingTokens = false; 820 this.tokenizedMatrix = true; 821 } 822 823 else if (this.expectingTokens && "NOTOKENS".equalsIgnoreCase(token)) { 824 ((CharactersBlockListener) this.getBlockListener()) 825 .setTokens(false); 826 this.expectingTokens = false; 827 this.tokenizedMatrix = false; 828 } 829 830 else if (this.expectingEliminate && "ELIMINATE".equalsIgnoreCase(token)) { 831 this.expectingFormat = false; 832 this.expectingDataType = false; 833 this.expectingRespectCase = false; 834 this.expectingMissing = false; 835 this.expectingGap = false; 836 this.expectingSymbols = false; 837 this.expectingEquate = false; 838 this.expectingMatchChar = false; 839 this.expectingLabels = false; 840 this.expectingTranspose = false; 841 this.expectingInterleave = false; 842 this.expectingItems = false; 843 this.expectingStatesFormat = false; 844 this.expectingTokens = false; 845 this.expectingEliminate = false; 846 this.expectingEliminateRange = true; 847 } 848 849 else if (this.expectingEliminateRange) { 850 final String parts[] = token.split("-"); 851 if (parts.length != 2) 852 throw new ParseException("Eliminate range " + token 853 + " not in form X-Y"); 854 try { 855 final int eliminateStart = Integer.parseInt(parts[0]); 856 final int eliminateEnd = Integer.parseInt(parts[1]); 857 ((CharactersBlockListener) this.getBlockListener()) 858 .setEliminateStart(eliminateStart); 859 ((CharactersBlockListener) this.getBlockListener()) 860 .setEliminateEnd(eliminateEnd); 861 } catch (NumberFormatException e) { 862 throw new ParseException("Values in eliminate range " + token 863 + " not parseable integers"); 864 } 865 this.expectingEliminateRange = false; 866 } 867 868 else if (this.expectingTaxLabel && "TAXLABELS".equalsIgnoreCase(token)) { 869 this.expectingFormat = false; 870 this.expectingDataType = false; 871 this.expectingRespectCase = false; 872 this.expectingMissing = false; 873 this.expectingGap = false; 874 this.expectingSymbols = false; 875 this.expectingEquate = false; 876 this.expectingMatchChar = false; 877 this.expectingLabels = false; 878 this.expectingTranspose = false; 879 this.expectingInterleave = false; 880 this.expectingItems = false; 881 this.expectingStatesFormat = false; 882 this.expectingTokens = false; 883 this.expectingEliminate = false; 884 this.expectingEliminateRange = false; 885 this.expectingTaxLabel = false; 886 this.expectingTaxLabelValue = true; 887 } 888 889 else if (this.expectingCharStateLabel 890 && "CHARSTATELABELS".equalsIgnoreCase(token)) { 891 this.expectingFormat = false; 892 this.expectingDataType = false; 893 this.expectingRespectCase = false; 894 this.expectingMissing = false; 895 this.expectingGap = false; 896 this.expectingSymbols = false; 897 this.expectingEquate = false; 898 this.expectingMatchChar = false; 899 this.expectingLabels = false; 900 this.expectingTranspose = false; 901 this.expectingInterleave = false; 902 this.expectingItems = false; 903 this.expectingStatesFormat = false; 904 this.expectingTokens = false; 905 this.expectingEliminate = false; 906 this.expectingEliminateRange = false; 907 this.expectingTaxLabel = false; 908 this.expectingTaxLabelValue = false; 909 this.expectingCharStateLabel = false; 910 this.expectingCharStateLabelKey = true; 911 } 912 913 else if (this.expectingCharLabel 914 && "CHARLABELS".equalsIgnoreCase(token)) { 915 this.expectingFormat = false; 916 this.expectingDataType = false; 917 this.expectingRespectCase = false; 918 this.expectingMissing = false; 919 this.expectingGap = false; 920 this.expectingSymbols = false; 921 this.expectingEquate = false; 922 this.expectingMatchChar = false; 923 this.expectingLabels = false; 924 this.expectingTranspose = false; 925 this.expectingInterleave = false; 926 this.expectingItems = false; 927 this.expectingStatesFormat = false; 928 this.expectingTokens = false; 929 this.expectingEliminate = false; 930 this.expectingEliminateRange = false; 931 this.expectingTaxLabel = false; 932 this.expectingTaxLabelValue = false; 933 this.expectingCharStateLabel = false; 934 this.expectingCharStateLabelKey = false; 935 this.expectingCharStateLabelName = false; 936 this.expectingCharStateLabelSynonym = false; 937 this.expectingCharLabel = false; 938 this.expectingCharLabelValue = true; 939 } 940 941 else if (this.expectingStateLabel 942 && "STATELABELS".equalsIgnoreCase(token)) { 943 this.expectingFormat = false; 944 this.expectingDataType = false; 945 this.expectingRespectCase = false; 946 this.expectingMissing = false; 947 this.expectingGap = false; 948 this.expectingSymbols = false; 949 this.expectingEquate = false; 950 this.expectingMatchChar = false; 951 this.expectingLabels = false; 952 this.expectingTranspose = false; 953 this.expectingInterleave = false; 954 this.expectingItems = false; 955 this.expectingStatesFormat = false; 956 this.expectingTokens = false; 957 this.expectingEliminate = false; 958 this.expectingEliminateRange = false; 959 this.expectingTaxLabel = false; 960 this.expectingTaxLabelValue = false; 961 this.expectingCharStateLabel = false; 962 this.expectingCharStateLabelKey = false; 963 this.expectingCharStateLabelName = false; 964 this.expectingCharStateLabelSynonym = false; 965 this.expectingCharLabel = false; 966 this.expectingCharLabelValue = false; 967 this.expectingStateLabel = false; 968 this.expectingStateLabelKey = true; 969 } 970 971 else if (this.expectingMatrix && "MATRIX".equalsIgnoreCase(token)) { 972 this.expectingFormat = false; 973 this.expectingDataType = false; 974 this.expectingRespectCase = false; 975 this.expectingMissing = false; 976 this.expectingGap = false; 977 this.expectingSymbols = false; 978 this.expectingEquate = false; 979 this.expectingMatchChar = false; 980 this.expectingLabels = false; 981 this.expectingTranspose = false; 982 this.expectingInterleave = false; 983 this.expectingItems = false; 984 this.expectingStatesFormat = false; 985 this.expectingTokens = false; 986 this.expectingEliminate = false; 987 this.expectingEliminateRange = false; 988 this.expectingTaxLabel = false; 989 this.expectingTaxLabelValue = false; 990 this.expectingCharStateLabel = false; 991 this.expectingCharStateLabelKey = false; 992 this.expectingCharStateLabelName = false; 993 this.expectingCharStateLabelSynonym = false; 994 this.expectingCharLabel = false; 995 this.expectingCharLabelValue = false; 996 this.expectingStateLabel = false; 997 this.expectingStateLabelKey = false; 998 this.expectingStateLabelContent = false; 999 this.expectingMatrix = false; 1000 this.expectingMatrixKey = true; 1001 } 1002 1003 else if (this.expectingTaxLabelValue) 1004 // Use untoken version to preserve spaces. 1005 ((CharactersBlockListener) this.getBlockListener()) 1006 .addTaxLabel(token); 1007 1008 else if (this.expectingCharStateLabelKey) { 1009 this.currentCharStateLabelKey = token; 1010 // Use untoken version to preserve spaces. 1011 ((CharactersBlockListener) this.getBlockListener()) 1012 .addCharState(token); 1013 this.expectingCharStateLabelKey = false; 1014 this.expectingCharStateLabelName = true; 1015 } 1016 1017 else if (this.expectingCharStateLabelName) { 1018 String actualName = token; 1019 String firstSynonym = null; 1020 if (token.indexOf("/") >= 0) { 1021 actualName = token.substring(0, token.indexOf("/")); 1022 if (token.indexOf("/") < token.length() - 2) 1023 firstSynonym = token.substring(token.indexOf("/") + 1); 1024 } 1025 final boolean skipSynonyms = actualName.endsWith(",") 1026 || (firstSynonym != null && firstSynonym.endsWith(",")); 1027 if (skipSynonyms) { 1028 if (firstSynonym != null) 1029 firstSynonym = firstSynonym.substring(0, firstSynonym 1030 .length() - 1); 1031 else 1032 actualName = actualName.substring(0, 1033 actualName.length() - 1); 1034 } 1035 // Use untoken version to preserve spaces. 1036 ((CharactersBlockListener) this.getBlockListener()) 1037 .setCharStateLabel(this.currentCharStateLabelKey, 1038 actualName); 1039 if (firstSynonym != null) 1040 ((CharactersBlockListener) this.getBlockListener()) 1041 .addCharStateKeyword(this.currentCharStateLabelKey, 1042 token); 1043 this.expectingCharStateLabelName = false; 1044 if (!skipSynonyms) 1045 this.expectingCharStateLabelSynonym = true; 1046 else 1047 this.expectingCharStateLabelKey = true; 1048 } 1049 1050 else if (this.expectingCharStateLabelSynonym) { 1051 if (token.startsWith("/") && token.length() > 1) 1052 token = token.substring(1); 1053 final boolean skipSynonyms = token.endsWith(","); 1054 if (skipSynonyms) 1055 token = token.substring(0, token.length() - 1); 1056 if (!"/".equals(token)) 1057 // Use untoken version to preserve spaces. 1058 ((CharactersBlockListener) this.getBlockListener()) 1059 .addCharStateKeyword(this.currentCharStateLabelKey, 1060 token); 1061 if (skipSynonyms) { 1062 this.expectingCharStateLabelSynonym = false; 1063 this.expectingCharStateLabelKey = true; 1064 } 1065 } 1066 1067 else if (this.expectingCharLabelValue) 1068 // Use untoken version to preserve spaces. 1069 ((CharactersBlockListener) this.getBlockListener()) 1070 .addCharLabel(token); 1071 1072 else if (this.expectingStateLabelKey) { 1073 final boolean skipContent = token.endsWith(","); 1074 if (skipContent) 1075 token = token.substring(0, token.length() - 1); 1076 this.currentStateLabelKey = token; 1077 // Use untoken version to preserve spaces. 1078 ((CharactersBlockListener) this.getBlockListener()).addState(token); 1079 if (!skipContent) { 1080 this.expectingStateLabelKey = false; 1081 this.expectingStateLabelContent = true; 1082 } 1083 } 1084 1085 else if (this.expectingStateLabelContent) { 1086 final boolean skipContent = token.endsWith(","); 1087 if (skipContent) 1088 token = token.substring(0, token.length() - 1); 1089 // Use untoken version to preserve spaces. 1090 ((CharactersBlockListener) this.getBlockListener()).addStateLabel( 1091 this.currentStateLabelKey, token); 1092 if (skipContent) { 1093 this.expectingStateLabelKey = true; 1094 this.expectingStateLabelContent = false; 1095 } 1096 } 1097 1098 else if (this.expectingMatrixKey) { 1099 this.currentMatrixKey = token; 1100 // Use untoken version to preserve spaces. 1101 ((CharactersBlockListener) this.getBlockListener()) 1102 .addMatrixEntry(token); 1103 this.expectingMatrixKey = false; 1104 this.expectingMatrixContent = true; 1105 // Update first line info and set up stack for entry. 1106 if (!this.matrixStack.containsKey(token)) { 1107 this.matrixStack.put(token, new Stack()); 1108 if (this.matrixPrependNulls > 0) 1109 for (int i = 0; i < this.matrixPrependNulls; i++) 1110 ((CharactersBlockListener) this.getBlockListener()) 1111 .appendMatrixData(this.currentMatrixKey, null); 1112 } 1113 if (this.matrixFirstLineKey == null) 1114 this.matrixFirstLineKey = this.currentMatrixKey; 1115 } 1116 1117 else if (this.expectingMatrixContent) { 1118 final Stack stack = (Stack) this.matrixStack 1119 .get(this.currentMatrixKey); 1120 if ("(".equals(token)) { 1121 final List newList = new ArrayList(); 1122 if (!stack.isEmpty()) 1123 ((Collection) stack.peek()).add(newList); 1124 else 1125 ((CharactersBlockListener) this.getBlockListener()) 1126 .appendMatrixData(this.currentMatrixKey, newList); 1127 stack.push(newList); 1128 } else if ("{".equals(token)) { 1129 final Set newSet = new LinkedHashSet(); 1130 if (!stack.isEmpty()) 1131 ((Collection) stack.peek()).add(newSet); 1132 else 1133 ((CharactersBlockListener) this.getBlockListener()) 1134 .appendMatrixData(this.currentMatrixKey, newSet); 1135 stack.push(newSet); 1136 } else if (")".equals(token) && !stack.isEmpty() 1137 && (stack.peek() instanceof List)) { 1138 stack.pop(); 1139 if (stack.isEmpty() 1140 && this.currentMatrixKey 1141 .equals(this.matrixFirstLineKey)) 1142 this.matrixFirstLineLength++; 1143 } else if ("}".equals(token) && !stack.isEmpty() 1144 && (stack.peek() instanceof Set)) { 1145 stack.pop(); 1146 if (stack.isEmpty() 1147 && this.currentMatrixKey 1148 .equals(this.matrixFirstLineKey)) 1149 this.matrixFirstLineLength++; 1150 } else { 1151 final boolean reallyUseTokens = (this.tokenizedMatrix || "CONTINUOUS" 1152 .equals(this.specifiedDataType)) 1153 && !("DNA".equals(this.specifiedDataType) 1154 || "RNA".equals(this.specifiedDataType) || "NUCLEOTIDE" 1155 .equals(this.specifiedDataType)); 1156 if (reallyUseTokens) { 1157 if (!stack.isEmpty()) 1158 ((Collection) stack.peek()).add(token); 1159 else { 1160 ((CharactersBlockListener) this.getBlockListener()) 1161 .appendMatrixData(this.currentMatrixKey, token); 1162 if (this.currentMatrixKey 1163 .equals(this.matrixFirstLineKey)) 1164 this.matrixFirstLineLength++; 1165 } 1166 } else { 1167 final String[] toks = token.split(""); 1168 for (int i = 0; i < toks.length; i++) { 1169 final String tok = toks[i]; 1170 if (!stack.isEmpty()) 1171 ((Collection) stack.peek()).add(tok); 1172 else { 1173 ((CharactersBlockListener) this.getBlockListener()) 1174 .appendMatrixData(this.currentMatrixKey, 1175 tok); 1176 if (this.currentMatrixKey 1177 .equals(this.matrixFirstLineKey)) 1178 this.matrixFirstLineLength++; 1179 } 1180 } 1181 } 1182 } 1183 } 1184 1185 else 1186 throw new ParseException("Found unexpected token " + token 1187 + " in CHARACTERS block"); 1188 } 1189}