001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.symbol; 024 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InvalidObjectException; 028import java.io.ObjectStreamException; 029import java.io.Serializable; 030import java.util.AbstractList; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.HashMap; 035import java.util.HashSet; 036import java.util.Iterator; 037import java.util.List; 038import java.util.Map; 039import java.util.NoSuchElementException; 040import java.util.Set; 041import java.util.WeakHashMap; 042 043import javax.xml.parsers.ParserConfigurationException; 044import javax.xml.parsers.SAXParserFactory; 045 046import org.biojava.bio.Annotation; 047import org.biojava.bio.BioError; 048import org.biojava.bio.BioException; 049import org.biojava.bio.SmallAnnotation; 050import org.biojava.bio.seq.io.AlternateTokenization; 051import org.biojava.bio.seq.io.CharacterTokenization; 052import org.biojava.bio.seq.io.NameTokenization; 053import org.biojava.bio.seq.io.SeqIOListener; 054import org.biojava.bio.seq.io.StreamParser; 055import org.biojava.bio.seq.io.SymbolTokenization; 056import org.biojava.utils.ChangeListener; 057import org.biojava.utils.ChangeType; 058import org.biojava.utils.ChangeVetoException; 059import org.biojava.utils.ClassTools; 060import org.biojava.utils.Unchangeable; 061import org.biojava.utils.cache.WeakValueHashMap; 062import org.biojava.utils.lsid.Identifiable; 063import org.biojava.utils.lsid.LifeScienceIdentifier; 064import org.biojava.utils.lsid.LifeScienceIdentifierParseException; 065import org.biojava.utils.stax.DelegationManager; 066import org.biojava.utils.stax.SAX2StAXAdaptor; 067import org.biojava.utils.stax.StAXContentHandler; 068import org.biojava.utils.stax.StAXContentHandlerBase; 069import org.biojava.utils.stax.StringElementHandlerBase; 070import org.xml.sax.Attributes; 071import org.xml.sax.InputSource; 072import org.xml.sax.SAXException; 073import org.xml.sax.XMLReader; 074 075 076/** 077 * Utility methods for working with Alphabets. Also acts as a registry for 078 * well-known alphabets. 079 * 080 * <p> 081 * The alphabet interfaces themselves don't give you a lot of help in actually 082 * getting an alphabet instance. This is where the AlphabetManager comes in 083 * handy. It helps out in serialization, generating derived alphabets and 084 * building CrossProductAlphabet instances. It also contains limited support for 085 * parsing complex alphabet names back into the alphabets. 086 * </p> 087 * 088 * @author Matthew Pocock 089 * @author Thomas Down 090 * @author Mark Schreiber 091 * @author George Waldon (alternate tokenization) 092 */ 093 094public final class AlphabetManager { 095 static private Map nameToAlphabet; 096 //static private Map nameToSymbol; 097 static private Map lsidToSymbol; 098 static private Map crossProductAlphabets; 099 static private Map ambiguitySymbols; 100 static private GapSymbol gapSymbol; 101 static private Map gapBySize; 102 static private Map alphabetToIndex = new WeakHashMap(); 103 static private Map symListToSymbol; 104 105 /** 106 * <p> 107 * Initialize the static AlphabetManager resources. 108 * </p> 109 * 110 * <p> 111 * This parses the resource 112 * <code>org/biojava/bio/seq/tools/AlphabetManager.xml</code> 113 * and builds a basic set of alphabets. 114 * </p> 115 */ 116 static { 117 nameToAlphabet = new HashMap(); 118 //nameToSymbol = new HashMap(); 119 lsidToSymbol = new HashMap(); 120 ambiguitySymbols = new HashMap(); 121 122 gapSymbol = new GapSymbol(); 123 gapBySize = new HashMap(); 124 gapBySize.put(new SizeQueen(new ArrayList()), gapSymbol); 125 126 nameToAlphabet.put("INTEGER", IntegerAlphabet.getInstance()); 127 nameToAlphabet.put("DOUBLE", DoubleAlphabet.getInstance()); 128 129 symListToSymbol = new WeakValueHashMap(); 130 131 try { 132 SizeQueen sq = new SizeQueen(Arrays.asList( 133 new Alphabet[] { DoubleAlphabet.getInstance() })); 134 gapBySize.put(sq, 135 new WellKnownGapSymbol( 136 Arrays.asList(new Symbol[] { gapSymbol}), sq)); 137 } catch (IllegalSymbolException ise) { 138 throw new BioError( 139 140 "Assertion Failure: Should be able to make gap basis", ise 141 ); 142 } 143 144 ambiguitySymbols.put(new HashSet(), gapSymbol); 145 try { 146 InputStream alphabetStream = ClassTools.getClassLoader(AlphabetManager.class).getResourceAsStream( 147 "org/biojava/bio/symbol/AlphabetManager.xml" 148 ); 149 if (alphabetStream == null) { 150 throw new BioError("Couldn't locate AlphabetManager.xml. This probably means that your biojava.jar file is corrupt or incorrectly built."); 151 } 152 InputSource is = new InputSource(alphabetStream); 153 loadAlphabets(is); 154 } catch (Exception t) { 155 throw new BioError( "Unable to initialize AlphabetManager", t); 156 } 157 } 158 159 /** 160 * Singleton instance. 161 */ 162 static private AlphabetManager am; 163 164 /** 165 * Retrieve the singleton instance. 166 * 167 * @return the AlphabetManager instance 168 * @deprecated all AlphabetManager methods have become static 169 */ 170 static public AlphabetManager instance() { 171 if(am == null) 172 am = new AlphabetManager(); 173 return am; 174 } 175 176 177 /** 178 * Return the ambiguity symbol which matches all symbols in 179 * a given alphabet. 180 * @since 1.2 181 * @param alpha The alphabet 182 * @return the ambiguity symbol 183 */ 184 185 public static Symbol getAllAmbiguitySymbol(FiniteAlphabet alpha) { 186 Set allSymbols = new HashSet(); 187 for (Iterator i = alpha.iterator(); i.hasNext(); ) { 188 allSymbols.add(i.next()); 189 } 190 try { 191 return alpha.getAmbiguity(allSymbols); 192 } catch (IllegalSymbolException ex) { 193 throw new BioError( "Assertion failure: coudn't recover all-ambiguity symbol", ex); 194 } 195 } 196 197 /** 198 * Return a set containing all possible symbols which can be 199 * considered members of a given alphabet, including ambiguous 200 * symbols. Warning, this method can return large sets! 201 * @since 1.2 202 * @param alpha The alphabet 203 * @return The set of symbols that are members of <code>alpha</code> 204 */ 205 206 public static Set getAllSymbols(FiniteAlphabet alpha) { 207 Set allSymbols = new HashSet(); 208 List orderedAlpha = new ArrayList(alpha.size()); 209 for (Iterator i = alpha.iterator(); i.hasNext(); ) { 210 orderedAlpha.add(i.next()); 211 } 212 213 int atomicSyms = alpha.size(); 214 int totalSyms = 1 << atomicSyms; 215 216 for (int cnt = 0; cnt < totalSyms; ++cnt) { 217 Set matchSet = new HashSet(); 218 for (int atom = 0; atom < atomicSyms; ++atom) { 219 if ((cnt & (1 << atom)) != 0) { 220 matchSet.add(orderedAlpha.get(atom)); 221 } 222 } 223 224 try { 225 allSymbols.add(alpha.getAmbiguity(matchSet)); 226 } catch (IllegalSymbolException ex) { 227 throw new BioError( "Assertion failed: couldn't get ambiguity symbol", ex); 228 } 229 } 230 231 return allSymbols; 232 } 233 234 235 236 /** 237 * Retrieve the alphabet for a specific name. 238 * 239 * @param name the name of the alphabet 240 * @return the alphabet object 241 * @throws NoSuchElementException if there is no alphabet by that name 242 */ 243 static public Alphabet alphabetForName(String name) 244 throws NoSuchElementException{ 245 Alphabet alpha = (Alphabet) nameToAlphabet.get(name); 246 if(alpha == null) { 247 if(name.startsWith("(") && name.endsWith(")")) { 248 alpha = generateCrossProductAlphaFromName(name); 249 } else { 250 throw new NoSuchElementException( 251 "No alphabet for name " + name + " could be found" 252 ); 253 } 254 } 255 return alpha; 256 } 257 /** 258 * Retrieve the symbol represented a String object 259 * @deprecated use symbolForLifeScienceID() instead 260 * @param name of the string whose symbol you want to get 261 * @throws NoSuchElementException if the string name is invalid. 262 * @return The Symbol 263 */ 264 static public Symbol symbolForName(String name) 265 throws NoSuchElementException { 266 String ls = "urn:lsid:biojava.org:symbol:"+name; 267 LifeScienceIdentifier lsid = null; 268 try { 269 lsid = LifeScienceIdentifier.valueOf(ls); 270 } catch (LifeScienceIdentifierParseException ex) { 271 throw new BioError("Cannot construct LSID for "+name, ex); 272 } 273 Symbol s = (Symbol) lsidToSymbol.get(lsid); 274 if(s == null) { 275 throw new NoSuchElementException("Could not find symbol under the name " + lsid); 276 } 277 return s; 278 } 279 280 /** 281 * Retreives the Symbol for the LSID 282 * @param lsid the URN for the Symbol 283 * @return a reference to the Symbol 284 */ 285 static public Symbol symbolForLifeScienceID(LifeScienceIdentifier lsid){ 286 return (Symbol)lsidToSymbol.get(lsid); 287 } 288 289 /** 290 * Register an alphabet by name. 291 * 292 * @param name the name by which it can be retrieved 293 * @param alphabet the Alphabet to store 294 */ 295 static public void registerAlphabet(String name, Alphabet alphabet) { 296 nameToAlphabet.put(name, alphabet); 297 if(alphabet instanceof AbstractAlphabet){ //this might be needed for serialization 298 ((AbstractAlphabet)alphabet).setRegistered(true); 299 } 300 } 301 302 /** 303 * Register and Alphabet by more than one name. This allows aliasing 304 * of an alphabet with two or more names. It is equivalent to calling 305 * <code>registerAlphabet(String name, Alphabet alphabet)</code> several 306 * times. 307 * 308 * @since 1.4 309 * @param names the names by which it can be retrieved 310 * @param alphabet the Alphabet to store 311 */ 312 static public void registerAlphabet(String[] names, Alphabet alphabet){ 313 for(int i = 0; i < names.length; i++){ 314 registerAlphabet(names[i], alphabet); 315 } 316 } 317 318 /** 319 * A set of names under which Alphabets have been registered. 320 * @return a <code>Set</code> of <code>Strings</code> 321 */ 322 static public Set registrations(){ 323 return Collections.unmodifiableSet(nameToAlphabet.keySet()); 324 } 325 326 /** 327 * Has an Alphabet been registered by that name 328 * @param name the name of the alphabet 329 * @return true if it has or false otherwise 330 */ 331 static public boolean registered(String name){ 332 return nameToAlphabet.containsKey(name); 333 } 334 335 /** 336 * Get an iterator over all alphabets known. 337 * 338 * @return an Iterator over Alphabet objects 339 */ 340 static public Iterator alphabets() { 341 return Collections.unmodifiableCollection(nameToAlphabet.values()).iterator(); 342 } 343 344 /** 345 * <p> 346 * Get the special `gap' Symbol. 347 * </p> 348 * 349 * <p> 350 * The gap symbol is a Symbol that has an empty alphabet of matches. As such 351 *, ever alphabet contains gap, as there is no symbol that matches gap, so 352 * there is no case where an alphabet doesn't contain a symbol that matches 353 * gap. 354 * </p> 355 * 356 * <p> 357 * Gap can be thought of as an empty sub-space within the space of all 358 * possible symbols. If you are working in a cross-product alphabet, you 359 * should chose whether to use gap to represent 'no symbol', or a basis symbol 360 * of the appropriate size built entirely of gaps to represent 'no symbol in 361 * each of the slots'. Perhaps this could be explained better. 362 * </p> 363 * 364 * @return the system-wide symbol that represents a gap 365 */ 366 static public Symbol getGapSymbol() { 367 return gapSymbol; 368 } 369 370 /** 371 * <p> 372 * Get the gap symbol appropriate to this list of alphabets. 373 * </p> 374 * 375 * <p> 376 * The gap symbol with have the same shape a the alphabet list. It will be as 377 * long as the list, and if any of the alphabets in the list have a dimension 378 * greater than 1, it will also insert the appropriate gap there. 379 * </p> 380 * 381 * @param alphas List of alphabets 382 * @return the appropriate gap symbol for the alphabet list 383 */ 384 static public Symbol getGapSymbol(List alphas) { 385 SizeQueen sq = new SizeQueen(alphas); 386 Symbol s = (Symbol) gapBySize.get(sq); 387 388 if(s == null) { 389 if(alphas.size() == 0) { // should never be needed 390 s = gapSymbol; 391 } else if(alphas.size() == 1) { // should never happen 392 Alphabet a = (Alphabet) alphas.get(0); 393 s = getGapSymbol(a.getAlphabets()); 394 } else { 395 List symList = new ArrayList(alphas.size()); 396 for(Iterator i = alphas.iterator(); i.hasNext(); ) { 397 Alphabet a = (Alphabet) i.next(); 398 symList.add(getGapSymbol(a.getAlphabets())); 399 } 400 try { 401 s = new WellKnownGapSymbol(symList, sq); 402 } catch (IllegalSymbolException ise) { 403 throw new BioError( 404 "Assertion Failure: Should be able to make gap basis", ise 405 ); 406 } 407 } 408 gapBySize.put(sq, s); 409 } 410 411 return s; 412 } 413 414 415 416 /** 417 * <p> 418 * Generate a new AtomicSymbol instance with a name and Annotation. 419 * </p> 420 * 421 * <p> 422 * Use this method if you wish to create an AtomicSymbol instance. Initially it 423 * will not be a member of any alphabet. 424 * </p> 425 * 426 * @param name the String returned by getName() 427 * @param annotation the Annotation returned by getAnnotation() 428 * @return a new AtomicSymbol instance 429 */ 430 static public AtomicSymbol createSymbol( 431 String name, Annotation annotation 432 ) { 433 AtomicSymbol as = new FundamentalAtomicSymbol(name, annotation); 434 return as; 435 } 436 437 /** 438 * <p> 439 * Generate a new AtomicSymbol instance with a name and an Empty Annotation. 440 * </p> 441 * 442 * <p> 443 * Use this method if you wish to create an AtomicSymbol instance. Initially it 444 * will not be a member of any alphabet. 445 * </p> 446 * 447 * @param name the String returned by getName() 448 * @return a new AtomicSymbol instance 449 */ 450 static public AtomicSymbol createSymbol( 451 String name 452 ) { 453 AtomicSymbol as = new FundamentalAtomicSymbol(name, Annotation.EMPTY_ANNOTATION); 454 return as; 455 } 456 457 /** 458 * <p> 459 * Generate a new AtomicSymbol instance with a token, name and Annotation. 460 * </p> 461 * 462 * <p> 463 * Use this method if you wish to create an AtomicSymbol instance. Initially it 464 * will not be a member of any alphabet. 465 * </p> 466 * 467 * @param token the Char token returned by getToken() (ignpred as of BioJava 1.2) 468 * @param name the String returned by getName() 469 * @param annotation the Annotation returned by getAnnotation() 470 * @return a new AtomicSymbol instance 471 * @deprecated Use the two-arg version of this method instead. 472 */ 473 static public AtomicSymbol createSymbol( 474 char token, String name, Annotation annotation 475 ) { 476 AtomicSymbol as = new FundamentalAtomicSymbol(name, annotation); 477 return as; 478 } 479 480 /** 481 * <p> 482 * Generates a new Symbol instance that represents the tuple of Symbols in 483 * symList. 484 * </p> 485 * 486 * <p> 487 * This method is most useful for writing Alphabet implementations. It should 488 * not be invoked by casual users. Use alphabet.getSymbol(List) instead. 489 * </p> 490 * @return a Symbol that encapsulates that List 491 * @deprecated use the new version, without the token argument 492 * @param annotation The annotation bundle for the symbol 493 * @param token the Symbol's token [ignored since 1.2] 494 * @param symList a list of Symbol objects 495 * @param alpha the Alphabet that this Symbol will reside in 496 * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made 497 */ 498 static public Symbol createSymbol( 499 char token, Annotation annotation, 500 List symList, Alphabet alpha 501 ) throws IllegalSymbolException { 502 return createSymbol(annotation, symList, alpha); 503 } 504 505 static private Symbol readFromCache(List symList) 506 { 507 //System.out.println("Reading symbol: " + symList + " -> " + symListToSymbol.get(symList)); 508 return (Symbol) symListToSymbol.get(symList); 509 } 510 511 static private void writeToCache(List symList, Symbol sym) 512 { 513 //System.out.println("Writing symbol: " + symList + " -> " + sym); 514 symListToSymbol.put(new ArrayList(symList), sym); 515 } 516 517 /** 518 * <p> 519 * Generates a new Symbol instance that represents the tuple of Symbols in 520 * symList. This will attempt to return the same symbol for the same list. 521 * </p> 522 * 523 * <p> 524 * This method is most useful for writing Alphabet implementations. It should 525 * not be invoked by casual users. Use alphabet.getSymbol(List) instead. 526 * </p> 527 * @return a Symbol that encapsulates that List 528 * @param annotation The annotation bundle for the Symbol 529 * @param symList a list of Symbol objects 530 * @param alpha the Alphabet that this Symbol will reside in 531 * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made 532 */ 533 static public Symbol createSymbol( 534 Annotation annotation, 535 List symList, Alphabet alpha) 536 throws IllegalSymbolException 537 { 538 Symbol cs = readFromCache(symList); 539 if(cs != null) { 540 return cs; 541 } 542 543 Iterator i = symList.iterator(); 544 int basis = 0; 545 int atomC = 0; 546 int gaps = 0; 547 while(i.hasNext()) { 548 Symbol s = (Symbol) i.next(); 549 if(s instanceof BasisSymbol) { 550 basis++; 551 if(s instanceof AtomicSymbol) { 552 atomC++; 553 } 554 } else { 555 Alphabet matches = s.getMatches(); 556 if(matches instanceof FiniteAlphabet) { 557 if(((FiniteAlphabet) matches).size() == 0) { 558 gaps++; 559 } 560 } 561 } 562 } 563 564 try { 565 if(atomC == symList.size()) { 566 Symbol sym = new SimpleAtomicSymbol(annotation, symList); 567 writeToCache(symList, sym); 568 return sym; 569 } else if((gaps + basis) == symList.size()) { 570 Symbol sym = new SimpleBasisSymbol( 571 annotation, 572 symList, 573 new SimpleAlphabet( 574 expandMatches(alpha, symList, new ArrayList()))); 575 writeToCache(symList, sym); 576 return sym; 577 } else { 578 Symbol sym = new SimpleSymbol( 579 annotation, 580 new SimpleAlphabet( 581 expandBasis(alpha, symList, new ArrayList()))); 582 writeToCache(symList, sym); 583 return sym; 584 } 585 } catch (IllegalSymbolException ise) { 586 throw new IllegalSymbolException( 587 ise, 588 "Could not create a new symbol with: " + 589 annotation + "\t" + 590 symList + "\t" + 591 alpha); 592 } 593 } 594 595 /** 596 * Expands a list of BasisSymbols into the set of AtomicSymbol instances 597 * it matches. 598 */ 599 private static Set expandBasis(Alphabet alpha, List symList, List built) { 600 int indx = built.size(); 601 if(indx < symList.size()) { 602 Symbol s = (Symbol) symList.get(indx); 603 if(s instanceof AtomicSymbol) { 604 built.add(s); 605 return expandBasis(alpha, symList, built); 606 } else { 607 Set res = new HashSet(); 608 Iterator i = ((FiniteAlphabet) s.getMatches()).iterator(); 609 while(i.hasNext()) { 610 AtomicSymbol as = (AtomicSymbol) i.next(); 611 List built2 = new ArrayList(built); 612 built2.add(as); 613 res.addAll(expandBasis(alpha, symList, built2)); 614 } 615 return res; 616 } 617 } else { 618 try { 619 return Collections.singleton(alpha.getSymbol(built)); 620 } catch (IllegalSymbolException ise) { 621 throw new BioError( 622 "Assertion Failure: Should just have legal AtomicSymbol instances.", ise 623 ); 624 } 625 } 626 } 627 628 /** 629 * <p> 630 * Generates a new Symbol instance that represents the tuple of Symbols in 631 * symList. 632 * </p> 633 * 634 * <p> 635 * This method is most useful for writing Alphabet implementations. It should 636 * not be invoked by users. Use alphabet.getSymbol(Set) instead. 637 * </p> 638 * @return a Symbol that encapsulates that List 639 * @deprecated use the three-arg version of this method instead. 640 * @param token the Symbol's token [ignored since 1.2] 641 * @param annotation the Symbol's Annotation 642 * @param symSet a Set of Symbol objects 643 * @param alpha the Alphabet that this Symbol will reside in 644 * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made 645 */ 646 static public Symbol createSymbol( 647 char token, Annotation annotation, 648 Set symSet, Alphabet alpha 649 ) throws IllegalSymbolException { 650 return createSymbol(annotation, symSet, alpha); 651 } 652 653 /** 654 * <p> 655 * Generates a new Symbol instance that represents the tuple of Symbols in 656 * symList. 657 * </p> 658 * 659 * <p> 660 * This method is most useful for writing Alphabet implementations. It should 661 * not be invoked by users. Use alphabet.getSymbol(Set) instead. 662 * </p> 663 * @return a Symbol that encapsulates that List 664 * @param annotation the Symbol's Annotation 665 * @param symSet a Set of Symbol objects 666 * @param alpha the Alphabet that this Symbol will reside in 667 * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made 668 */ 669 static public Symbol createSymbol( 670 Annotation annotation, 671 Set symSet, Alphabet alpha 672 ) throws IllegalSymbolException { 673 if(symSet.size() == 0) { 674 return getGapSymbol(); 675 } 676 Set asSet = new HashSet(); 677 int len = -1; 678 for( 679 Iterator i = symSet.iterator(); 680 i.hasNext(); 681 ) { 682 Symbol s = (Symbol) i.next(); 683 if(s instanceof AtomicSymbol) { 684 AtomicSymbol as = (AtomicSymbol) s; 685 int l = as.getSymbols().size(); 686 if(len == -1) { 687 len = l; 688 } else if(len != l) { 689 throw new IllegalSymbolException( 690 "Can't build ambiguity symbol as the symbols have inconsistent " + 691 "length" 692 ); 693 } 694 asSet.add(as); 695 } else { 696 for(Iterator j = ((FiniteAlphabet) s.getMatches()).iterator(); 697 j.hasNext(); 698 ) { 699 AtomicSymbol as = ( AtomicSymbol) j.next(); 700 int l = as.getSymbols().size(); 701 if(len == -1) { 702 len = l; 703 } else if(len != l) { 704 throw new IllegalSymbolException( 705 "Can't build ambiguity symbol as the symbols have inconsistent " + 706 "length" 707 ); 708 } 709 asSet.add(as); 710 } 711 } 712 } 713 if(asSet.size() == 0) { 714 return getGapSymbol(); 715 } else if(asSet.size() == 1) { 716 return (Symbol) asSet.iterator().next(); 717 } else { 718 if(len == 1) { 719 return new SimpleBasisSymbol( 720 annotation, new SimpleAlphabet(asSet) 721 ); 722 } else { 723 List fs = factorize(alpha, asSet); 724 if(fs == null) { 725 return new SimpleSymbol( 726 annotation, 727 new SimpleAlphabet(asSet) 728 ); 729 } else { 730 return new SimpleBasisSymbol( 731 annotation, 732 fs, new SimpleAlphabet( 733 expandBasis(alpha, fs, new ArrayList()) 734 ) 735 ); 736 } 737 } 738 } 739 } 740 741 /** 742 * Generates a new CrossProductAlphabet from the give name. 743 * 744 * @param name the name to parse 745 * @return the associated Alphabet 746 */ 747 static public Alphabet generateCrossProductAlphaFromName( 748 String name 749 ) { 750 if(!name.startsWith("(") || !name.endsWith(")")) { 751 throw new BioError( 752 "Can't parse " + name + 753 " into a cross-product alphabet as it is not bracketed" 754 ); 755 } 756 757 name = name.substring(1, name.length()-1).trim(); 758 List aList = new ArrayList(); // the alphabets 759 int i = 0; 760 while(i < name.length()) { 761 if(name.charAt(i) == '(') { 762 int depth = 1; 763 int j = i+1; 764 while(j < name.length() && depth > 0) { 765 char c = name.charAt(j); 766 if(c == '(') { 767 depth++; 768 } else if(c == ')') { 769 depth--; 770 } 771 j++; 772 } 773 if(depth == 0) { 774 aList.add(alphabetForName(name.substring(i, j))); 775 i = j; 776 } else { 777 throw new BioError( 778 "Error parsing alphabet name: could not find matching bracket\n" + 779 name.substring(i) 780 ); 781 } 782 } else { 783 int j = name.indexOf(" x ", i); 784 if(j < 0) { 785 aList.add(alphabetForName(name.substring(i).trim())); 786 i = name.length(); 787 } else { 788 if(i != j){ 789 aList.add(alphabetForName(name.substring(i, j).trim())); 790 } 791 i = j + " x ".length(); 792 } 793 } 794 } 795 796 return getCrossProductAlphabet(aList); 797 } 798 799 /** 800 * <p> 801 * Retrieve a CrossProductAlphabet instance over the alphabets in aList. 802 * </p> 803 * 804 * <p> 805 * If all of the alphabets in aList implements FiniteAlphabet then the 806 * method will return a FiniteAlphabet. Otherwise, it returns a non-finite 807 * alphabet. 808 * </p> 809 * 810 * <p> 811 * If you call this method twice with a list containing the same alphabets, 812 * it will return the same alphabet. This promotes the re-use of alphabets 813 * and helps to maintain the 'flyweight' principal for finite alphabet 814 * symbols. 815 * </p> 816 * 817 * <p> 818 * The resulting alphabet cpa will be retrievable via 819 * AlphabetManager.alphabetForName(cpa.getName()) 820 * </p> 821 * @param aList a list of Alphabet objects 822 * @return a CrossProductAlphabet that is over the alphabets in aList 823 */ 824 static public Alphabet getCrossProductAlphabet(List aList) { 825 return getCrossProductAlphabet(aList, (Alphabet) null); 826 } 827 828 829 /** 830 * Attempts to create a cross product alphabet and register it under a name. 831 * @param aList A list of alphabets 832 * @param name The name which the new alphabet will be registered under. 833 * @throws org.biojava.bio.symbol.IllegalAlphabetException If the Alphabet cannot be made or a different 834 * alphabet is already registed under this name. 835 * @return The CrossProductAlphabet 836 */ 837 static public Alphabet getCrossProductAlphabet(List aList, String name) 838 throws IllegalAlphabetException { 839 Alphabet currentAlpha = (Alphabet) nameToAlphabet.get(name); 840 if(currentAlpha != null) { 841 if(currentAlpha.getAlphabets().equals(aList)) { 842 return currentAlpha; 843 } else { 844 throw new IllegalAlphabetException(name + " already registered"); 845 } 846 } else { 847 Alphabet alpha = getCrossProductAlphabet(aList); 848 registerAlphabet(name, alpha); 849 return alpha; 850 } 851 } 852 853 /** 854 * <p> 855 * Retrieve a CrossProductAlphabet instance over the alphabets in aList. 856 * </p> 857 * 858 * <p> 859 * This method is most usefull for implementors of cross-product alphabets, 860 * allowing them to safely build the matches alphabets for ambiguity symbols. 861 * </p> 862 * 863 * <p> 864 * If all of the alphabets in aList implements FiniteAlphabet then the 865 * method will return a FiniteAlphabet. Otherwise, it returns a non-finite 866 * alphabet. 867 * </p> 868 * 869 * <p> 870 * If you call this method twice with a list containing the same alphabets, 871 * it will return the same alphabet. This promotes the re-use of alphabets 872 * and helps to maintain the 'flyweight' principal for finite alphabet 873 * symbols. 874 * </p> 875 * 876 * <p> 877 * The resulting alphabet cpa will be retrievable via 878 * AlphabetManager.alphabetForName(cpa.getName()) 879 * </p> 880 * 881 * @param aList a list of Alphabet objects 882 * @param parent a parent alphabet 883 * @return a CrossProductAlphabet that is over the alphabets in aList 884 */ 885 static public Alphabet getCrossProductAlphabet( 886 List aList, Alphabet parent 887 ) { 888 if(aList.size() == 0) { 889 return Alphabet.EMPTY_ALPHABET; 890 } 891 892 // This trap means that the `product' operator can be 893 // safely applied to a single alphabet. 894 895 if (aList.size() == 1) 896 return (Alphabet) aList.get(0); 897 898 if(crossProductAlphabets == null) { 899 crossProductAlphabets = new HashMap(); 900 } 901 902 Alphabet cpa = (Alphabet) crossProductAlphabets.get(aList); 903 904 int size = 1; 905 if(cpa == null) { 906 for(Iterator i = aList.iterator(); i.hasNext(); ) { 907 Alphabet aa = (Alphabet) i.next(); 908 if(! (aa instanceof FiniteAlphabet) ) { 909 cpa = new InfiniteCrossProductAlphabet(aList); 910 break; 911 } 912 if(size <= 1000) { 913 size *= ((FiniteAlphabet) aa).size(); 914 } 915 } 916 if(cpa == null) { 917 try { 918 if(size > 0 && size < 1000) { 919 cpa = new SimpleCrossProductAlphabet(aList, parent); 920 } else { 921 cpa = new SparseCrossProductAlphabet(aList); 922 } 923 } catch (IllegalAlphabetException iae) { 924 throw new BioError( 925 "Could not create SimpleCrossProductAlphabet for " + aList + 926 " even though we should be able to. No idea what is wrong." 927 ); 928 } 929 } 930 crossProductAlphabets.put(new ArrayList(aList), cpa); 931 registerAlphabet(cpa.getName(), cpa); 932 } 933 934 return cpa; 935 } 936 937 private static Set expandMatches(Alphabet parent, List symList, List built) { 938 int indx = built.size(); 939 if(indx < symList.size()) { 940 Symbol bs = (Symbol) symList.get(indx); 941 if(bs instanceof AtomicSymbol) { 942 built.add(bs); 943 return expandMatches(parent, symList, built); 944 } else { 945 Set syms = new HashSet(); 946 Iterator i = ((FiniteAlphabet) bs.getMatches()).iterator(); 947 while(i.hasNext()) { 948 List built2 = new ArrayList(built); 949 built2.add((AtomicSymbol) i.next()); 950 syms.addAll(expandMatches(parent, symList, built2)); 951 } 952 return syms; 953 } 954 } else { 955 try { 956 Symbol s = parent.getSymbol(built); 957 if(s instanceof AtomicSymbol) { 958 return Collections.singleton((AtomicSymbol) s); 959 } else { 960 Set syms = new HashSet(); 961 for(Iterator i = ((FiniteAlphabet) s.getMatches()).iterator(); i.hasNext(); ) { 962 syms.add((AtomicSymbol) i.next()); 963 } 964 return syms; 965 } 966 } catch (IllegalSymbolException ise) { 967 throw new BioError("Assertion Failure: Couldn't create symbol.", ise); 968 } 969 } 970 } 971 972 /** 973 * <p> 974 * Return a list of BasisSymbol instances that uniquely sum up all 975 * AtomicSymbol 976 * instances in symSet. If the symbol can't be represented by a single list of 977 * BasisSymbol instances, return null. 978 * </p> 979 * 980 * <p> 981 * This method is most useful for implementers of Alphabet and Symbol. It 982 * probably should not be invoked by users. 983 * </p> 984 * @return a List of BasisSymbols 985 * @param symSet the Set of AtomicSymbol instances 986 * @param alpha the Alphabet instance that the Symbols are from 987 * @throws org.biojava.bio.symbol.IllegalSymbolException In practice it should not. If it does it probably 988 * indicates a subtle bug somewhere in AlphabetManager 989 */ 990 public static List factorize(Alphabet alpha, Set symSet) 991 throws IllegalSymbolException { 992 List alphas = alpha.getAlphabets(); 993 List facts = new ArrayList(); 994 int size = symSet.size(); 995 Set syms = new HashSet(); 996 for(int col = 0; col < alphas.size(); col++) { 997 Alphabet a = (Alphabet) alphas.get(col); 998 for(Iterator i = symSet.iterator(); i.hasNext(); ) { 999 syms.add( 1000 (AtomicSymbol) ((AtomicSymbol) 1001 i.next()).getSymbols().get(col) 1002 ); 1003 } 1004 int s = syms.size(); 1005 if( (size % s) != 0 ) { 1006 return null; 1007 } 1008 size /= s; 1009 facts.add(a.getAmbiguity(syms)); 1010 syms.clear(); 1011 } 1012 if(size != 1) { 1013 return null; 1014 } 1015 return facts; 1016 } 1017 1018 1019 1020 1021 /** 1022 * Load additional Alphabets, defined in XML format, into the AlphabetManager's registry. 1023 * These can the be retrieved by calling <code>alphabetForName</code>. 1024 * 1025 * @param is an <code>InputSource</code> encapsulating the document to be parsed 1026 * @throws IOException if there is an error accessing the stream 1027 * @throws SAXException if there is an error while parsing the document 1028 * @throws BioException if a problem occurs when creating the new Alphabets. 1029 * @since 1.3 1030 */ 1031 1032 public static void loadAlphabets(InputSource is) 1033 throws SAXException, IOException, BioException 1034 { 1035 try { 1036 SAXParserFactory spf = SAXParserFactory.newInstance(); 1037 spf.setNamespaceAware(true); 1038 XMLReader parser = spf.newSAXParser().getXMLReader(); 1039 parser.setContentHandler(new SAX2StAXAdaptor(new AlphabetManagerHandler())); 1040 parser.parse(is); 1041 } catch (ParserConfigurationException ex) { 1042 throw new BioException( "Unable to create XML parser", ex); 1043 } 1044 } 1045 1046 /** 1047 * StAX handler for the alphabetManager element 1048 */ 1049 1050 private static class AlphabetManagerHandler extends StAXContentHandlerBase { 1051 public void startElement(String nsURI, 1052 String localName, 1053 String qName, 1054 Attributes attrs, 1055 DelegationManager dm) 1056 throws SAXException 1057 { 1058 if (localName.equals("alphabetManager")) { 1059 // ignore 1060 } else if (localName.equals("symbol")) { 1061 String name = attrs.getValue("name"); 1062 dm.delegate(new SymbolHandler(name)); 1063 } else if (localName.equals("alphabet")) { 1064 String name = attrs.getValue("name"); 1065 String parent = attrs.getValue("parent"); 1066 FiniteAlphabet parentAlpha = null; 1067 if (parent != null && parent.length() > 0) { 1068 parentAlpha = (FiniteAlphabet) nameToAlphabet.get(parent); 1069 } 1070 dm.delegate(new AlphabetHandler(name, parentAlpha)); 1071 } else { 1072 throw new SAXException( 1073 "Unknown element in alphabetManager: " + 1074 localName); 1075 } 1076 } 1077 1078 public void endElement(String nsURI, 1079 String localName, 1080 String qName, 1081 StAXContentHandler delegate) 1082 throws SAXException 1083 { 1084 if (delegate instanceof SymbolHandler) { 1085 SymbolHandler sh = (SymbolHandler) delegate; 1086 //String name = sh.getName(); 1087 LifeScienceIdentifier lsid = sh.getLSID(); 1088 Symbol symbol = sh.getSymbol(); 1089 if (lsidToSymbol.containsKey(lsid)) { 1090 throw new SAXException( 1091 "There is already a top-level symbol named " 1092 + lsid); 1093 } 1094 lsidToSymbol.put(lsid, symbol); 1095 } else if (delegate instanceof AlphabetHandler) { 1096 AlphabetHandler ah = (AlphabetHandler) delegate; 1097 String name = ah.getName(); 1098 FiniteAlphabet alpha = ah.getAlphabet(); 1099 registerAlphabet(name, alpha); 1100 } 1101 } 1102 1103 private class SymbolHandler extends StAXContentHandlerBase { 1104 private String name; 1105 private LifeScienceIdentifier lsid; 1106 private Symbol symbol; 1107 private Annotation annotation = new SmallAnnotation(); 1108 1109 public SymbolHandler(String id) { 1110 try { 1111 lsid = LifeScienceIdentifier.valueOf(id); 1112 name = lsid.getObjectId(); 1113 } catch (LifeScienceIdentifierParseException ex) { 1114 throw new BioError("Malformed LSID - "+name, ex); 1115 } 1116 } 1117 1118 public void startElement(String nsURI, 1119 String localName, 1120 String qName, 1121 Attributes attrs, 1122 DelegationManager dm) 1123 throws SAXException 1124 { 1125 if (localName.equals("symbol")) { 1126 // ignore 1127 } else if (localName.equals("description")) { 1128 dm.delegate(new StringElementHandlerBase() { 1129 protected void setStringValue(String s) { 1130 try { 1131 annotation.setProperty("description", s); 1132 } catch (ChangeVetoException ex) { 1133 throw new BioError( "Assertion failure: veto while modifying new Annotation", ex); 1134 } 1135 } 1136 } ); 1137 } else { 1138 throw new SAXException("Unknown element in symbol: " + localName); 1139 } 1140 } 1141 1142 public void endTree() { 1143 symbol = new WellKnownAtomicSymbol( 1144 new FundamentalAtomicSymbol( 1145 name, 1146 annotation 1147 ), 1148 lsid 1149 ); 1150 } 1151 1152 Symbol getSymbol() { 1153 return symbol; 1154 } 1155 1156 String getName() { 1157 return name; 1158 } 1159 1160 LifeScienceIdentifier getLSID(){ 1161 return lsid; 1162 } 1163 } 1164 1165 private class AlphabetHandler extends StAXContentHandlerBase { 1166 private String name; 1167 //private Map localSymbols; 1168 private WellKnownAlphabet alpha; 1169 private ImmutableWellKnownAlphabetWrapper alphaWrapper; 1170 1171 String getName() { 1172 return name; 1173 } 1174 1175 FiniteAlphabet getAlphabet() { 1176 return alphaWrapper; 1177 } 1178 1179 public void endTree() { 1180 alpha.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN); 1181 } 1182 1183 public AlphabetHandler(String name, FiniteAlphabet parent) { 1184 this.name = name; 1185 //localSymbols = new OverlayMap(nameToSymbol); 1186 alpha = new WellKnownAlphabet(); 1187 alpha.setName(name); 1188 alphaWrapper = new ImmutableWellKnownAlphabetWrapper(alpha); 1189 if (parent != null) { 1190 for (Iterator i = parent.iterator(); i.hasNext(); ) { 1191 WellKnownAtomicSymbol sym = 1192 (WellKnownAtomicSymbol) i.next(); 1193 try { 1194 alpha.addSymbol(sym); 1195 } catch (Exception ex) { 1196 throw new BioError( 1197 "Couldn't initialize alphabet from parent", ex); 1198 } 1199 lsidToSymbol.put(sym.getIdentifier(), sym); 1200 } 1201 } 1202 } 1203 1204 public void startElement(String nsURI, 1205 String localName, 1206 String qName, 1207 Attributes attrs, 1208 DelegationManager dm) 1209 throws SAXException 1210 { 1211 if (localName.equals("alphabet")) { 1212 // ignore 1213 } else if (localName.equals("symbol")) { 1214 String name = attrs.getValue("name"); 1215 dm.delegate(new SymbolHandler(name)); 1216 } else if (localName.equals("symbolref")) { 1217 String name = attrs.getValue("name"); 1218 LifeScienceIdentifier lsid = null; 1219 try { 1220 lsid = 1221 LifeScienceIdentifier.valueOf(name); 1222 } catch (LifeScienceIdentifierParseException ex) { 1223 throw new SAXException("Couldn't form a LSID from "+name); 1224 } 1225 Symbol sym = (Symbol) lsidToSymbol.get(lsid); 1226 if (sym == null) { 1227 throw new SAXException( 1228 "Reference to non-existent symbol " + name); 1229 } 1230 addSymbol(sym); 1231 } else if (localName.equals("characterTokenization")) { 1232 String name = attrs.getValue("name"); 1233 boolean caseSensitive = "true".equals(attrs.getValue("caseSensitive")); 1234 dm.delegate(new CharacterTokenizationHandler(name, alphaWrapper, lsidToSymbol, caseSensitive)); 1235 } else if (localName.equals("description")) { 1236 dm.delegate(new StringElementHandlerBase() { 1237 protected void setStringValue(String s) { 1238 try { 1239 alpha.getAnnotation().setProperty("description", s); 1240 } catch (ChangeVetoException ex) { 1241 throw new BioError( "Assertion failure: veto while modifying new Annotation", ex); 1242 } 1243 } 1244 } ); 1245 } else { 1246 throw new SAXException("Unknown element in alphabetl: " + localName); 1247 } 1248 } 1249 1250 public void endElement(String nsURI, 1251 String localName, 1252 String qName, 1253 StAXContentHandler delegate) 1254 throws SAXException 1255 { 1256 if (delegate instanceof SymbolHandler) { 1257 SymbolHandler sh = (SymbolHandler) delegate; 1258 //String name = sh.getName(); 1259 Symbol symbol = sh.getSymbol(); 1260 LifeScienceIdentifier lsid = sh.getLSID(); 1261 lsidToSymbol.put(lsid, symbol); 1262 addSymbol(symbol); 1263 } else if (delegate instanceof CharacterTokenizationHandler) { 1264 CharacterTokenizationHandler cth = (CharacterTokenizationHandler) delegate; 1265 String name = cth.getName(); 1266 SymbolTokenization toke = cth.getTokenization(); 1267 alpha.putTokenization(name, toke); 1268 } 1269 } 1270 1271 private void addSymbol(Symbol sym) 1272 throws SAXException 1273 { 1274 try { 1275 alpha.addSymbol(sym); 1276 } catch (ChangeVetoException cve) { 1277 throw new BioError( "Assertion failure: veto while modifying new Alphabet", cve); 1278 } catch (IllegalSymbolException ex) { 1279 throw new SAXException("IllegalSymbolException adding symbol to alphabet"); 1280 } 1281 } 1282 } 1283 1284 private class CharacterTokenizationHandler extends StAXContentHandlerBase { 1285 private String name; 1286 private Map localSymbols; 1287 private SymbolTokenization toke; 1288 private boolean isAlternate; 1289 1290 String getName() { 1291 return name; 1292 } 1293 1294 SymbolTokenization getTokenization() { 1295 return toke; 1296 } 1297 1298 public CharacterTokenizationHandler(String name, 1299 FiniteAlphabet alpha, 1300 Map localSymbols, 1301 boolean caseSensitive) 1302 { 1303 1304 this.name = name; 1305 this.localSymbols = new HashMap(); 1306 for (Iterator i = alpha.iterator(); i.hasNext(); ) { 1307 WellKnownAtomicSymbol sym = (WellKnownAtomicSymbol) i.next(); 1308 this.localSymbols.put(sym.getIdentifier(), sym); 1309 } 1310 if(name.indexOf("alternate")==0) { 1311 toke = new AlternateTokenization(alpha, caseSensitive); 1312 isAlternate = true; 1313 } else 1314 toke = new CharacterTokenization(alpha, caseSensitive); 1315 } 1316 1317 public void startElement(String nsURI, 1318 String localName, 1319 String qName, 1320 Attributes attrs, 1321 DelegationManager dm) 1322 throws SAXException 1323 { 1324 if (localName.equals("characterTokenization")) { 1325 // ignore 1326 } else if (localName.equals("atomicMapping")) { 1327 dm.delegate(new MappingHandler(true)); 1328 } else if (localName.equals("ambiguityMapping")) { 1329 dm.delegate(new MappingHandler(false)); 1330 } else if (localName.equals("gapSymbolMapping")) { 1331 dm.delegate(new MappingHandler(false, true)); 1332 } else { 1333 throw new SAXException("Unknown element in characterTokenization: " + localName); 1334 } 1335 } 1336 1337 private class MappingHandler extends StAXContentHandlerBase { 1338 public MappingHandler(boolean isAtomic, boolean isPureGap) { 1339 this.isAtomic = isAtomic; 1340 this.isPureGap = isPureGap; 1341 } 1342 1343 public MappingHandler(boolean isAtomic) { 1344 this(isAtomic, false); 1345 } 1346 1347 boolean isAtomic; 1348 boolean isPureGap; 1349 Set symbols = new HashSet(); 1350 char c = '\0'; 1351 String str = ""; 1352 int level = 0; 1353 1354 public void startElement(String nsURI, 1355 String localName, 1356 String qName, 1357 Attributes attrs, 1358 DelegationManager dm) 1359 throws SAXException 1360 { 1361 if (level == 0) { 1362 c = attrs.getValue("token").charAt(0); 1363 if(isAlternate) 1364 str = attrs.getValue("token"); 1365 } else { 1366 if (localName.equals("symbolref")) { 1367 String name = attrs.getValue("name"); 1368 LifeScienceIdentifier lsid = null; 1369 try { 1370 lsid = LifeScienceIdentifier.valueOf(name); 1371 } catch (LifeScienceIdentifierParseException ex) { 1372 throw new SAXException("Cannot for LSID from " + name); 1373 } 1374 Symbol sym = (Symbol) localSymbols.get(lsid); 1375 if (sym == null) { 1376 throw new SAXException("Reference to non-existent symbol " + name); 1377 } 1378 symbols.add(sym); 1379 } else { 1380 throw new SAXException("Unknown element in mapping: " + localName); 1381 } 1382 } 1383 ++level; 1384 } 1385 1386 public void endElement(String nsURI, 1387 String localName, 1388 String qName, 1389 StAXContentHandler delegate) 1390 throws SAXException 1391 { 1392 --level; 1393 } 1394 1395 public void endTree() 1396 throws SAXException 1397 { 1398 Symbol ambiSym; 1399 if(isPureGap) { 1400 ambiSym = getGapSymbol(); 1401 } else { 1402 try { 1403 ambiSym = toke.getAlphabet().getAmbiguity(symbols); 1404 } catch (IllegalSymbolException ex) { 1405 throw (SAXException) 1406 new SAXException("IllegalSymbolException binding mapping for " + c).initCause(ex); 1407 } 1408 } 1409 if(isAlternate) 1410 ((AlternateTokenization)toke).bindSymbol(ambiSym, str); 1411 else 1412 ((CharacterTokenization)toke).bindSymbol(ambiSym, c); 1413 } 1414 } 1415 } 1416 } 1417 1418 private static class WellKnownTokenizationWrapper 1419 extends Unchangeable 1420 implements SymbolTokenization, Serializable 1421 { 1422 private String name; 1423 private Alphabet alphabet; 1424 private SymbolTokenization toke; 1425 1426 WellKnownTokenizationWrapper(Alphabet alpha, SymbolTokenization toke, String name) { 1427 super(); 1428 this.alphabet = alpha; 1429 this.name = name; 1430 this.toke = toke; 1431 } 1432 1433 public Alphabet getAlphabet() { 1434 return alphabet; 1435 } 1436 1437 public TokenType getTokenType() { 1438 return toke.getTokenType(); 1439 } 1440 1441 public StreamParser parseStream(SeqIOListener listener) { 1442 return toke.parseStream(listener); 1443 } 1444 1445 public Symbol parseToken(String s) 1446 throws IllegalSymbolException 1447 { 1448 return toke.parseToken(s); 1449 } 1450 1451 public String tokenizeSymbol(Symbol s) 1452 throws IllegalSymbolException 1453 { 1454 return toke.tokenizeSymbol(s); 1455 } 1456 1457 public String tokenizeSymbolList(SymbolList sl) 1458 throws IllegalAlphabetException, IllegalSymbolException 1459 { 1460 return toke.tokenizeSymbolList(sl); 1461 } 1462 1463 public Annotation getAnnotation() { 1464 return toke.getAnnotation(); 1465 } 1466 1467 public Object writeReplace() { 1468 return new OPH(getAlphabet().getName(), name); 1469 } 1470 1471 private static class OPH implements Serializable { 1472 private String alphaName; 1473 private String name; 1474 1475 OPH(String alphaName, String name) { 1476 this.alphaName = alphaName; 1477 this.name = name; 1478 } 1479 1480 private Object readResolve() throws ObjectStreamException { 1481 try { 1482 Alphabet alphabet = alphabetForName(alphaName); 1483 return alphabet.getTokenization(name); 1484 } catch (Exception ex) { 1485 throw new InvalidObjectException("Couldn't resolve tokenization " + name + " in alphabet " + alphaName); 1486 } 1487 } 1488 } 1489 } 1490 1491 /** 1492 * An alphabet contained WellKnownSymbols 1493 */ 1494 1495 private static class WellKnownAlphabet 1496 extends SimpleAlphabet 1497 { 1498 public WellKnownAlphabet() { 1499 super(); 1500 } 1501 1502 public WellKnownAlphabet(Set s) { 1503 super(s); 1504 } 1505 1506 protected Symbol getAmbiguityImpl(Set s) 1507 throws IllegalSymbolException 1508 { 1509 return getWellKnownAmbiguitySymbol(s); 1510 } 1511 } 1512 1513 /** 1514 * A wrapper which makes an Alphabet unchangable, and also fixes serialization 1515 */ 1516 1517 private static class ImmutableWellKnownAlphabetWrapper 1518 extends Unchangeable 1519 implements FiniteAlphabet, Serializable 1520 { 1521 private FiniteAlphabet alpha; 1522 private Map tokenizationsByName = new HashMap(); 1523 1524 public ImmutableWellKnownAlphabetWrapper(FiniteAlphabet alpha) { 1525 super(); 1526 this.alpha = alpha; 1527 } 1528 1529 private Object writeReplace() { 1530 return new OPH(getName()); 1531 } 1532 1533 public SymbolTokenization getTokenization(String name) 1534 throws BioException 1535 { 1536 SymbolTokenization toke = (SymbolTokenization) tokenizationsByName.get(name); 1537 if (toke == null) { 1538 if ("name".equals(name)) { 1539 toke = new NameTokenization(this); 1540 } else { 1541 toke = new WellKnownTokenizationWrapper(this, alpha.getTokenization(name), name); 1542 } 1543 tokenizationsByName.put(name, toke); 1544 } 1545 return toke; 1546 } 1547 1548 /** 1549 * Placeholder for a WellKnownAlphabet in a serialized 1550 * object stream. 1551 */ 1552 1553 private static class OPH implements Serializable { 1554 private String name; 1555 1556 public OPH(String name) { 1557 this.name = name; 1558 } 1559 1560 private Object readResolve() throws ObjectStreamException { 1561 try { 1562 Alphabet a = AlphabetManager.alphabetForName(name); 1563 return a; 1564 } catch (NoSuchElementException ex) { 1565 throw new InvalidObjectException("Couldn't resolve alphabet " + name); 1566 } 1567 } 1568 } 1569 1570 public boolean contains(Symbol s) { 1571 return alpha.contains(s); 1572 } 1573 1574 public List getAlphabets() { 1575 return Collections.singletonList(this); 1576 } 1577 1578 public Symbol getAmbiguity(Set s) 1579 throws IllegalSymbolException 1580 { 1581 return alpha.getAmbiguity(s); 1582 } 1583 1584 public Symbol getGapSymbol() { 1585 return alpha.getGapSymbol(); 1586 } 1587 1588 public String getName() { 1589 return alpha.getName(); 1590 } 1591 1592 public Symbol getSymbol(List l) 1593 throws IllegalSymbolException 1594 { 1595 return alpha.getSymbol(l); 1596 } 1597 1598 public void validate(Symbol s) 1599 throws IllegalSymbolException 1600 { 1601 alpha.validate(s); 1602 } 1603 1604 public void addSymbol(Symbol s) 1605 throws ChangeVetoException 1606 { 1607 throw new ChangeVetoException("Can't add symbols to Well Known Alphabets"); 1608 } 1609 1610 public void removeSymbol(Symbol s) 1611 throws ChangeVetoException 1612 { 1613 throw new ChangeVetoException("Can't remove symbols from Well Known Alphabets"); 1614 } 1615 1616 public Iterator iterator() { 1617 return alpha.iterator(); 1618 } 1619 1620 public int size() { 1621 return alpha.size(); 1622 } 1623 1624 public Annotation getAnnotation() { 1625 return alpha.getAnnotation(); 1626 } 1627 } 1628 1629 1630 /** 1631 * A well-known gap. Resolved in serialized data 1632 */ 1633 private static class WellKnownGapSymbol extends AbstractSimpleBasisSymbol implements Serializable{ 1634 private SizeQueen sq; 1635 public WellKnownGapSymbol(List symList, SizeQueen sq) throws IllegalSymbolException{ 1636 super(Annotation.EMPTY_ANNOTATION, 1637 symList, 1638 Alphabet.EMPTY_ALPHABET); 1639 this.sq = sq; 1640 } 1641 1642 private Object readResolve() throws ObjectStreamException{ 1643 //System.out.println("ping!!"); 1644 return AlphabetManager.getGapSymbol(sq.getAlphas()); 1645 } 1646 } 1647 /** 1648 * A well-known symbol. Replaced by a placeholder in 1649 * serialized data. 1650 */ 1651 1652 private static class WellKnownAtomicSymbol 1653 extends WellKnownBasisSymbol 1654 implements AtomicSymbol, Identifiable { 1655 1656 LifeScienceIdentifier lsid; 1657 1658 WellKnownAtomicSymbol(AtomicSymbol symbol, LifeScienceIdentifier lsid) { 1659 super(symbol); 1660 this.lsid = lsid; 1661 } 1662 1663 public LifeScienceIdentifier getIdentifier(){ 1664 return lsid; 1665 } 1666 1667 public Alphabet getMatches() { 1668 return new SingletonAlphabet(this); 1669 } 1670 1671 private Object writeReplace() { 1672 return new WellKnownAtomicSymbol.OPH(getIdentifier()); 1673 } 1674 1675 /** 1676 * Object Place Holder 1677 */ 1678 private static class OPH implements Serializable { 1679 private LifeScienceIdentifier name; 1680 1681 public OPH(LifeScienceIdentifier name) { 1682 this.name = name; 1683 } 1684 1685 private Object readResolve() throws ObjectStreamException { 1686 try { 1687 return symbolForLifeScienceID(name); 1688 } catch (NoSuchElementException ex) { 1689 throw new InvalidObjectException( 1690 "Couldn't resolve symbol:" + name 1691 ); 1692 } 1693 } 1694 } 1695 } 1696 1697 private static class WellKnownBasisSymbol 1698 extends Unchangeable 1699 implements BasisSymbol, Serializable 1700 { 1701 protected BasisSymbol symbol; 1702 private Set matches; 1703 1704 WellKnownBasisSymbol(BasisSymbol symbol) { 1705 super(); 1706 symbol.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN); // Immutable 1707 this.symbol = symbol; 1708 this.matches = new HashSet(); 1709 for (Iterator i = ((FiniteAlphabet) symbol.getMatches()).iterator(); i.hasNext(); ) { 1710 matches.add(i.next()); 1711 } 1712 } 1713 1714 Symbol getSymbol() { 1715 return symbol; 1716 } 1717 1718 public int hashCode() { 1719 return symbol.hashCode(); 1720 } 1721 1722 public boolean equals(Object o) { 1723 if (o instanceof WellKnownBasisSymbol) { 1724 return symbol.equals(((WellKnownBasisSymbol) o).getSymbol()); 1725 } else { 1726 return false; 1727 } 1728 } 1729 1730 public String getName() { 1731 return symbol.getName(); 1732 } 1733 1734 public Alphabet getMatches() { 1735 return symbol.getMatches(); 1736 } 1737 1738 public List getSymbols() { 1739 return Collections.singletonList(this); 1740 } 1741 1742 public Annotation getAnnotation() { 1743 return symbol.getAnnotation(); 1744 } 1745 1746 private Object writeReplace() { 1747 return new OPH(matches); 1748 } 1749 1750 1751 private static class OPH implements Serializable { 1752 private Set matches; 1753 1754 public OPH(Set matches) { 1755 OPH.this.matches = matches; 1756 } 1757 1758 private Object readResolve() /* throws ObjectStreamException */ { 1759 return getWellKnownAmbiguitySymbol(matches); 1760 } 1761 } 1762 } 1763 1764 /** 1765 * <p> 1766 * The class representing the Gap symbol. 1767 * </p> 1768 * 1769 * <p> 1770 * The gap is quite special. It is an ambiguity symbol with an empty alphabet. 1771 * This means that it notionaly represents an unfilled slot in a sequence. 1772 * It should be a singleton, hence the 1773 * placement in AlphabetManager and also the method normalize. 1774 * </p> 1775 * 1776 * @author Matthew Pocock 1777 */ 1778 private static class GapSymbol 1779 extends 1780 Unchangeable 1781 implements 1782 Symbol, 1783 Serializable 1784 { 1785 public GapSymbol() { 1786 } 1787 1788 public String getName() { 1789 return "gap"; 1790 } 1791 1792 public char getToken() { 1793 return '-'; 1794 } 1795 1796 public Annotation getAnnotation() { 1797 return Annotation.EMPTY_ANNOTATION; 1798 } 1799 1800 public Alphabet getMatches() { 1801 return Alphabet.EMPTY_ALPHABET; 1802 } 1803 1804 1805 private Object readResolve() throws ObjectStreamException { 1806 return AlphabetManager.getGapSymbol(); 1807 } 1808 } 1809 1810 1811 /** 1812 * Get an indexer for a specified alphabet. 1813 * 1814 * @param alpha The alphabet to index 1815 * @return an AlphabetIndex instance 1816 * @since 1.1 1817 */ 1818 1819 /** 1820 * Get an indexer for a specified alphabet. 1821 * 1822 * @param alpha The alphabet to index 1823 * @return an AlphabetIndex instance 1824 * @since 1.1 1825 */ 1826 public static AlphabetIndex getAlphabetIndex( 1827 FiniteAlphabet alpha 1828 ) { 1829 final int generateIndexSize = 160; 1830 AlphabetIndex ai = (AlphabetIndex) alphabetToIndex.get(alpha); 1831 if(ai == null) { 1832 int size = alpha.size(); 1833 if(size <= generateIndexSize) { 1834 ai = new LinearAlphabetIndex(alpha); 1835 } else { 1836 if(alpha.getAlphabets().size() > 1) { 1837 ai = new CrossProductAlphabetIndex(alpha); 1838 } else { 1839 ai = new HashedAlphabetIndex(alpha); 1840 } 1841 } 1842 alphabetToIndex.put(alpha, ai); 1843 } 1844 return ai; 1845 } 1846 1847 /** 1848 * Get an indexer for an array of symbols. 1849 * 1850 * @param syms the Symbols to index in that order 1851 * @return an AlphabetIndex instance 1852 * @since 1.1 1853 */ 1854 public static AlphabetIndex getAlphabetIndex ( 1855 Symbol[] syms 1856 ) throws IllegalSymbolException, BioException { 1857 return new LinearAlphabetIndex(syms); 1858 } 1859 1860 private static final class SizeQueen extends AbstractList implements Serializable{ 1861 private final List alphas; 1862 1863 public SizeQueen(List alphas) { 1864 this.alphas = alphas; 1865 } 1866 1867 public int size() { 1868 return alphas.size(); 1869 } 1870 1871 public List getAlphas(){ 1872 return this.alphas; 1873 } 1874 1875 public Object get(int pos) { 1876 Alphabet a = (Alphabet) alphas.get(pos); 1877 List al = a.getAlphabets(); 1878 int size = al.size(); 1879 if(size > 1) { 1880 return new SizeQueen(al); 1881 } else { 1882 return new Integer(size); 1883 } 1884 } 1885 } 1886 1887 private static Symbol getWellKnownAmbiguitySymbol(Set s) { 1888 Symbol sym = (Symbol) ambiguitySymbols.get(s); 1889 if (sym == null) { 1890 SimpleAlphabet matchAlpha = new WellKnownAlphabet(s); 1891 sym = new WellKnownBasisSymbol(new SimpleBasisSymbol(Annotation.EMPTY_ANNOTATION, matchAlpha)); 1892 ambiguitySymbols.put(new HashSet(s), sym); 1893 } 1894 return sym; 1895 } 1896}