001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.symbol;
024
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.InvalidObjectException;
028import java.io.ObjectStreamException;
029import java.io.Serializable;
030import java.util.AbstractList;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.HashMap;
035import java.util.HashSet;
036import java.util.Iterator;
037import java.util.List;
038import java.util.Map;
039import java.util.NoSuchElementException;
040import java.util.Set;
041import java.util.WeakHashMap;
042
043import javax.xml.parsers.ParserConfigurationException;
044import javax.xml.parsers.SAXParserFactory;
045
046import org.biojava.bio.Annotation;
047import org.biojava.bio.BioError;
048import org.biojava.bio.BioException;
049import org.biojava.bio.SmallAnnotation;
050import org.biojava.bio.seq.io.AlternateTokenization;
051import org.biojava.bio.seq.io.CharacterTokenization;
052import org.biojava.bio.seq.io.NameTokenization;
053import org.biojava.bio.seq.io.SeqIOListener;
054import org.biojava.bio.seq.io.StreamParser;
055import org.biojava.bio.seq.io.SymbolTokenization;
056import org.biojava.utils.ChangeListener;
057import org.biojava.utils.ChangeType;
058import org.biojava.utils.ChangeVetoException;
059import org.biojava.utils.ClassTools;
060import org.biojava.utils.Unchangeable;
061import org.biojava.utils.cache.WeakValueHashMap;
062import org.biojava.utils.lsid.Identifiable;
063import org.biojava.utils.lsid.LifeScienceIdentifier;
064import org.biojava.utils.lsid.LifeScienceIdentifierParseException;
065import org.biojava.utils.stax.DelegationManager;
066import org.biojava.utils.stax.SAX2StAXAdaptor;
067import org.biojava.utils.stax.StAXContentHandler;
068import org.biojava.utils.stax.StAXContentHandlerBase;
069import org.biojava.utils.stax.StringElementHandlerBase;
070import org.xml.sax.Attributes;
071import org.xml.sax.InputSource;
072import org.xml.sax.SAXException;
073import org.xml.sax.XMLReader;
074
075
076/**
077 * Utility methods for working with Alphabets.  Also acts as a registry for
078 * well-known alphabets.
079 *
080 * <p>
081 * The alphabet interfaces themselves don't give you a lot of help in actually
082 * getting an alphabet instance. This is where the AlphabetManager comes in
083 * handy. It helps out in serialization, generating derived alphabets and
084 * building CrossProductAlphabet instances. It also contains limited support for
085 * parsing complex alphabet names back into the alphabets.
086 * </p>
087 *
088 * @author Matthew Pocock
089 * @author Thomas Down
090 * @author Mark Schreiber
091 * @author George Waldon (alternate tokenization)
092 */
093
094public final class AlphabetManager {
095  static private Map nameToAlphabet;
096  //static private Map nameToSymbol;
097  static private Map lsidToSymbol;
098  static private Map crossProductAlphabets;
099  static private Map ambiguitySymbols;
100  static private GapSymbol gapSymbol;
101  static private Map gapBySize;
102  static private Map alphabetToIndex = new WeakHashMap();
103  static private Map symListToSymbol;
104    
105  /**
106   * <p>
107   * Initialize the static AlphabetManager resources.
108   * </p>
109   *
110   * <p>
111   * This parses the resource
112   * <code>org/biojava/bio/seq/tools/AlphabetManager.xml</code>
113   * and builds a basic set of alphabets.
114   * </p>
115   */
116  static {
117    nameToAlphabet = new HashMap();
118    //nameToSymbol = new HashMap();
119    lsidToSymbol = new HashMap();
120    ambiguitySymbols = new HashMap();
121
122    gapSymbol = new GapSymbol();
123    gapBySize = new HashMap();
124    gapBySize.put(new SizeQueen(new ArrayList()), gapSymbol);
125
126    nameToAlphabet.put("INTEGER", IntegerAlphabet.getInstance());
127    nameToAlphabet.put("DOUBLE", DoubleAlphabet.getInstance());
128
129    symListToSymbol = new WeakValueHashMap();
130
131    try {
132      SizeQueen sq = new SizeQueen(Arrays.asList(
133                new Alphabet[] { DoubleAlphabet.getInstance() }));  
134      gapBySize.put(sq, 
135                    new WellKnownGapSymbol(
136                         Arrays.asList(new Symbol[] { gapSymbol}), sq));
137    } catch (IllegalSymbolException ise) {
138      throw new BioError(
139
140        "Assertion Failure: Should be able to make gap basis", ise
141      );
142    }
143
144    ambiguitySymbols.put(new HashSet(), gapSymbol);
145    try {
146      InputStream alphabetStream = ClassTools.getClassLoader(AlphabetManager.class).getResourceAsStream(
147        "org/biojava/bio/symbol/AlphabetManager.xml"
148      );
149      if (alphabetStream == null) {
150          throw new BioError("Couldn't locate AlphabetManager.xml.  This probably means that your biojava.jar file is corrupt or incorrectly built.");
151      }
152      InputSource is = new InputSource(alphabetStream);
153      loadAlphabets(is);
154    } catch (Exception t) {
155      throw new BioError( "Unable to initialize AlphabetManager", t);
156    }
157  }
158
159    /**
160   * Singleton instance.
161   */
162  static private AlphabetManager am;
163
164  /**
165   * Retrieve the singleton instance.
166   *
167   * @return the AlphabetManager instance
168   * @deprecated all AlphabetManager methods have become static
169   */
170  static public AlphabetManager instance() {
171    if(am == null)
172      am = new AlphabetManager();
173    return am;
174  }
175
176  
177    /**
178     * Return the ambiguity symbol which matches all symbols in
179     * a given alphabet.
180     * @since 1.2
181     * @param alpha The alphabet
182     * @return the ambiguity symbol
183     */
184
185    public static Symbol getAllAmbiguitySymbol(FiniteAlphabet alpha) {
186        Set allSymbols = new HashSet();
187        for (Iterator i = alpha.iterator(); i.hasNext(); ) {
188            allSymbols.add(i.next());
189        }
190        try {
191            return alpha.getAmbiguity(allSymbols);
192        } catch (IllegalSymbolException ex) {
193            throw new BioError( "Assertion failure: coudn't recover all-ambiguity symbol", ex);
194        }
195    }
196
197    /**
198     * Return a set containing all possible symbols which can be
199     * considered members of a given alphabet, including ambiguous
200     * symbols.  Warning, this method can return large sets!
201     * @since 1.2
202     * @param alpha The alphabet
203     * @return The set of symbols that are members of <code>alpha</code>
204     */
205
206    public static Set getAllSymbols(FiniteAlphabet alpha) {
207        Set allSymbols = new HashSet();
208        List orderedAlpha = new ArrayList(alpha.size());
209        for (Iterator i = alpha.iterator(); i.hasNext(); ) {
210            orderedAlpha.add(i.next());
211        }
212
213        int atomicSyms = alpha.size();
214        int totalSyms = 1 << atomicSyms;
215
216        for (int cnt = 0; cnt < totalSyms; ++cnt) {
217            Set matchSet = new HashSet();
218            for (int atom = 0; atom < atomicSyms; ++atom) {
219                if ((cnt & (1 << atom)) != 0) {
220                    matchSet.add(orderedAlpha.get(atom));
221                }
222            }
223
224            try {
225                allSymbols.add(alpha.getAmbiguity(matchSet));
226            } catch (IllegalSymbolException ex) {
227                throw new BioError( "Assertion failed: couldn't get ambiguity symbol", ex);
228            }
229        }
230
231        return allSymbols;
232    }
233
234
235
236  /**
237   * Retrieve the alphabet for a specific name.
238   *
239   * @param name the name of the alphabet
240   * @return the alphabet object
241   * @throws NoSuchElementException if there is no alphabet by that name
242   */
243  static public Alphabet alphabetForName(String name)
244  throws NoSuchElementException{
245    Alphabet alpha = (Alphabet) nameToAlphabet.get(name);
246    if(alpha == null) {
247      if(name.startsWith("(") && name.endsWith(")")) {
248        alpha = generateCrossProductAlphaFromName(name);
249      } else {
250        throw new NoSuchElementException(
251          "No alphabet for name " + name + " could be found"
252        );
253      }
254    }
255    return alpha;
256  }
257   /**
258   * Retrieve the symbol represented a String object
259   * @deprecated use symbolForLifeScienceID() instead
260   * @param name of the string whose symbol you want to get
261   * @throws NoSuchElementException if the string name is invalid.
262   * @return The Symbol
263   */
264  static public Symbol symbolForName(String name)
265  throws NoSuchElementException {
266    String ls = "urn:lsid:biojava.org:symbol:"+name;
267    LifeScienceIdentifier lsid = null;
268    try {
269      lsid = LifeScienceIdentifier.valueOf(ls);
270    } catch (LifeScienceIdentifierParseException ex) {
271      throw new BioError("Cannot construct LSID for "+name, ex);
272    }
273    Symbol s = (Symbol) lsidToSymbol.get(lsid);
274    if(s == null) {
275      throw new NoSuchElementException("Could not find symbol under the name " + lsid);
276    }
277    return s;
278  }
279
280  /**
281   * Retreives the Symbol for the LSID
282   * @param lsid the URN for the Symbol
283   * @return a reference to the Symbol
284   */
285  static public Symbol symbolForLifeScienceID(LifeScienceIdentifier lsid){
286    return (Symbol)lsidToSymbol.get(lsid);
287  }
288
289  /**
290   * Register an alphabet by name.
291   *
292   * @param name  the name by which it can be retrieved
293   * @param alphabet the Alphabet to store
294   */
295  static public void registerAlphabet(String name, Alphabet alphabet) {
296    nameToAlphabet.put(name, alphabet);
297    if(alphabet instanceof AbstractAlphabet){ //this might be needed for serialization
298          ((AbstractAlphabet)alphabet).setRegistered(true);
299    }
300  }
301  
302  /**
303   * Register and Alphabet by more than one name. This allows aliasing
304   * of an alphabet with two or more names. It is equivalent to calling
305   * <code>registerAlphabet(String name, Alphabet alphabet)</code> several
306   * times.
307   *
308   * @since 1.4
309   * @param names  the names by which it can be retrieved
310   * @param alphabet the Alphabet to store
311   */
312  static public void registerAlphabet(String[] names, Alphabet alphabet){
313      for(int i = 0; i < names.length; i++){
314          registerAlphabet(names[i], alphabet);
315      }
316  }
317  
318  /**
319   * A set of names under which Alphabets have been registered.
320   * @return a <code>Set</code> of <code>Strings</code>
321   */
322  static public Set registrations(){
323      return Collections.unmodifiableSet(nameToAlphabet.keySet());
324  }
325
326  /**
327   * Has an Alphabet been registered by that name
328   * @param name the name of the alphabet
329   * @return true if it has or false otherwise
330   */
331  static public boolean registered(String name){
332    return nameToAlphabet.containsKey(name);
333  }
334
335  /**
336   * Get an iterator over all alphabets known.
337   *
338   * @return an Iterator over Alphabet objects
339   */
340  static public Iterator alphabets() {
341    return Collections.unmodifiableCollection(nameToAlphabet.values()).iterator();
342  }
343
344  /**
345   * <p>
346   * Get the special `gap' Symbol.
347   * </p>
348   *
349   * <p>
350   * The gap symbol is a Symbol that has an empty alphabet of matches. As such
351   *, ever alphabet contains gap, as there is no symbol that matches gap, so
352   * there is no case where an alphabet doesn't contain a symbol that matches
353   * gap.
354   * </p>
355   *
356   * <p>
357   * Gap can be thought of as an empty sub-space within the space of all
358   * possible symbols. If you are working in a cross-product alphabet, you
359   * should chose whether to use gap to represent 'no symbol', or a basis symbol
360   * of the appropriate size built entirely of gaps to represent 'no symbol in
361   * each of the slots'. Perhaps this could be explained better.
362   * </p>
363   *
364   * @return the system-wide symbol that represents a gap
365   */
366  static public Symbol getGapSymbol() {
367    return gapSymbol;
368  }
369
370  /**
371   * <p>
372   * Get the gap symbol appropriate to this list of alphabets.
373   * </p>
374   *
375   * <p>
376   * The gap symbol with have the same shape a the alphabet list. It will be as
377   * long as the list, and if any of the alphabets in the list have a dimension
378   * greater than 1, it will also insert the appropriate gap there.
379   * </p>
380   *
381   * @param alphas  List of alphabets
382   * @return the appropriate gap symbol for the alphabet list
383   */
384  static public Symbol getGapSymbol(List alphas) {
385    SizeQueen sq = new SizeQueen(alphas);
386    Symbol s = (Symbol) gapBySize.get(sq);
387
388    if(s == null) {
389      if(alphas.size() == 0) { // should never be needed
390        s = gapSymbol;
391      } else if(alphas.size() == 1) { // should never happen
392        Alphabet a = (Alphabet) alphas.get(0);
393        s = getGapSymbol(a.getAlphabets());
394      } else {
395        List symList = new ArrayList(alphas.size());
396        for(Iterator i = alphas.iterator(); i.hasNext(); ) {
397          Alphabet a = (Alphabet) i.next();
398          symList.add(getGapSymbol(a.getAlphabets()));
399        }
400        try {
401          s = new WellKnownGapSymbol(symList, sq);
402        } catch (IllegalSymbolException ise) {
403          throw new BioError(
404            "Assertion Failure: Should be able to make gap basis", ise
405          );
406        }
407      }
408      gapBySize.put(sq, s);
409    }
410
411    return s;
412  }
413  
414  
415
416  /**
417   * <p>
418   * Generate a new AtomicSymbol instance with a name and Annotation.
419   * </p>
420   *
421   * <p>
422   * Use this method if you wish to create an AtomicSymbol instance. Initially it
423   * will not be a member of any alphabet.
424   * </p>
425   *
426   * @param name  the String returned by getName()
427   * @param annotation the Annotation returned by getAnnotation()
428   * @return a new AtomicSymbol instance
429   */
430  static public AtomicSymbol createSymbol(
431    String name, Annotation annotation
432  ) {
433    AtomicSymbol as = new FundamentalAtomicSymbol(name, annotation);
434    return as;
435  }
436
437  /**
438   * <p>
439   * Generate a new AtomicSymbol instance with a name and an Empty Annotation.
440   * </p>
441   *
442   * <p>
443   * Use this method if you wish to create an AtomicSymbol instance. Initially it
444   * will not be a member of any alphabet.
445   * </p>
446   *
447   * @param name  the String returned by getName()
448   * @return a new AtomicSymbol instance
449   */
450  static public AtomicSymbol createSymbol(
451      String name
452      ) {
453    AtomicSymbol as = new FundamentalAtomicSymbol(name, Annotation.EMPTY_ANNOTATION);
454    return as;
455  }
456
457  /**
458   * <p>
459   * Generate a new AtomicSymbol instance with a token, name and Annotation.
460   * </p>
461   *
462   * <p>
463   * Use this method if you wish to create an AtomicSymbol instance. Initially it
464   * will not be a member of any alphabet.
465   * </p>
466   *
467   * @param token  the Char token returned by getToken() (ignpred as of BioJava 1.2)
468   * @param name  the String returned by getName()
469   * @param annotation the Annotation returned by getAnnotation()
470   * @return a new AtomicSymbol instance
471   * @deprecated Use the two-arg version of this method instead.
472   */
473  static public AtomicSymbol createSymbol(
474    char token, String name, Annotation annotation
475  ) {
476    AtomicSymbol as = new FundamentalAtomicSymbol(name, annotation);
477    return as;
478  }
479
480  /**
481   * <p>
482   * Generates a new Symbol instance that represents the tuple of Symbols in
483   * symList.
484   * </p>
485   * 
486   * <p>
487   * This method is most useful for writing Alphabet implementations. It should
488   * not be invoked by casual users. Use alphabet.getSymbol(List) instead.
489   * </p>
490   * @return a Symbol that encapsulates that List
491   * @deprecated use the new version, without the token argument
492   * @param annotation The annotation bundle for the symbol
493   * @param token the Symbol's token [ignored since 1.2]
494   * @param symList a list of Symbol objects
495   * @param alpha the Alphabet that this Symbol will reside in
496   * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made
497   */
498  static public Symbol createSymbol(
499    char token, Annotation annotation,
500    List symList, Alphabet alpha
501  ) throws IllegalSymbolException {
502      return createSymbol(annotation, symList, alpha);
503  }
504
505  static private Symbol readFromCache(List symList)
506  {
507    //System.out.println("Reading symbol: " + symList + " -> " + symListToSymbol.get(symList));
508    return (Symbol) symListToSymbol.get(symList);
509  }
510
511  static private void writeToCache(List symList, Symbol sym)
512  {
513    //System.out.println("Writing symbol: " + symList + " -> " + sym);
514    symListToSymbol.put(new ArrayList(symList), sym);
515  }
516
517  /**
518   * <p>
519   * Generates a new Symbol instance that represents the tuple of Symbols in
520   * symList. This will attempt to return the same symbol for the same list.
521   * </p>
522   * 
523   * <p>
524   * This method is most useful for writing Alphabet implementations. It should
525   * not be invoked by casual users. Use alphabet.getSymbol(List) instead.
526   * </p>
527   * @return a Symbol that encapsulates that List
528   * @param annotation The annotation bundle for the Symbol
529   * @param symList a list of Symbol objects
530   * @param alpha the Alphabet that this Symbol will reside in
531   * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made
532   */
533  static public Symbol createSymbol(
534    Annotation annotation,
535    List symList, Alphabet alpha)
536          throws IllegalSymbolException
537  {
538    Symbol cs = readFromCache(symList);
539    if(cs != null) {
540      return cs;
541    }
542
543    Iterator i = symList.iterator();
544    int basis = 0;
545    int atomC = 0;
546    int gaps = 0;
547    while(i.hasNext()) {
548      Symbol s = (Symbol) i.next();
549      if(s instanceof BasisSymbol) {
550        basis++;
551        if(s instanceof AtomicSymbol) {
552          atomC++;
553        }
554      } else {
555        Alphabet matches = s.getMatches();
556        if(matches instanceof FiniteAlphabet) {
557          if(((FiniteAlphabet) matches).size() == 0) {
558            gaps++;
559          }
560        }
561      }
562    }
563
564    try {
565      if(atomC == symList.size()) {
566        Symbol sym = new SimpleAtomicSymbol(annotation, symList);
567        writeToCache(symList, sym);
568        return sym;
569      } else if((gaps + basis) == symList.size()) {
570        Symbol sym = new SimpleBasisSymbol(
571                annotation,
572                symList,
573                new SimpleAlphabet(
574                        expandMatches(alpha, symList, new ArrayList())));
575        writeToCache(symList, sym);
576        return sym;
577      } else {
578        Symbol sym = new SimpleSymbol(
579                annotation,
580                new SimpleAlphabet(
581                        expandBasis(alpha, symList, new ArrayList())));
582        writeToCache(symList,  sym);
583        return sym;
584      }
585    } catch (IllegalSymbolException ise) {
586      throw new IllegalSymbolException(
587              ise,
588              "Could not create a new symbol with: " +
589              annotation + "\t" +
590              symList + "\t" +
591              alpha);
592    }
593  }
594
595  /**
596   * Expands a list of BasisSymbols into the set of AtomicSymbol instances
597   * it matches.
598   */
599  private static Set expandBasis(Alphabet alpha, List symList, List built) {
600    int indx = built.size();
601    if(indx < symList.size()) {
602      Symbol s = (Symbol) symList.get(indx);
603      if(s instanceof AtomicSymbol) {
604        built.add(s);
605        return expandBasis(alpha, symList, built);
606      } else {
607        Set res = new HashSet();
608        Iterator i = ((FiniteAlphabet) s.getMatches()).iterator();
609        while(i.hasNext()) {
610          AtomicSymbol as = (AtomicSymbol) i.next();
611          List built2 = new ArrayList(built);
612          built2.add(as);
613          res.addAll(expandBasis(alpha, symList, built2));
614        }
615        return res;
616      }
617    } else {
618      try {
619        return Collections.singleton(alpha.getSymbol(built));
620      } catch (IllegalSymbolException ise) {
621        throw new BioError(
622          "Assertion Failure: Should just have legal AtomicSymbol instances.", ise
623        );
624      }
625    }
626  }
627
628  /**
629   * <p>
630   * Generates a new Symbol instance that represents the tuple of Symbols in
631   * symList.
632   * </p>
633   * 
634   * <p>
635   * This method is most useful for writing Alphabet implementations. It should
636   * not be invoked by users. Use alphabet.getSymbol(Set) instead.
637   * </p>
638   * @return a Symbol that encapsulates that List
639   * @deprecated use the three-arg version of this method instead.
640   * @param token the Symbol's token [ignored since 1.2]
641   * @param annotation the Symbol's Annotation
642   * @param symSet a Set of Symbol objects
643   * @param alpha the Alphabet that this Symbol will reside in
644   * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made
645   */
646  static public Symbol createSymbol(
647    char token, Annotation annotation,
648    Set symSet, Alphabet alpha
649  ) throws IllegalSymbolException {
650      return createSymbol(annotation, symSet, alpha);
651  }
652
653  /**
654   * <p>
655   * Generates a new Symbol instance that represents the tuple of Symbols in
656   * symList.
657   * </p>
658   * 
659   * <p>
660   * This method is most useful for writing Alphabet implementations. It should
661   * not be invoked by users. Use alphabet.getSymbol(Set) instead.
662   * </p>
663   * @return a Symbol that encapsulates that List
664   * @param annotation the Symbol's Annotation
665   * @param symSet a Set of Symbol objects
666   * @param alpha the Alphabet that this Symbol will reside in
667   * @throws org.biojava.bio.symbol.IllegalSymbolException If the Symbol cannot be made
668   */
669  static public Symbol createSymbol(
670    Annotation annotation,
671    Set symSet, Alphabet alpha
672  ) throws IllegalSymbolException {
673    if(symSet.size() == 0) {
674      return getGapSymbol();
675    }
676    Set asSet = new HashSet();
677    int len = -1;
678    for(
679      Iterator i = symSet.iterator();
680      i.hasNext();
681    ) {
682      Symbol s = (Symbol) i.next();
683      if(s instanceof AtomicSymbol) {
684        AtomicSymbol as = (AtomicSymbol) s;
685        int l = as.getSymbols().size();
686        if(len == -1) {
687          len = l;
688        } else if(len != l) {
689          throw new IllegalSymbolException(
690            "Can't build ambiguity symbol as the symbols have inconsistent " +
691            "length"
692          );
693        }
694        asSet.add(as);
695      } else {
696        for(Iterator j = ((FiniteAlphabet) s.getMatches()).iterator();
697          j.hasNext();
698        ) {
699          AtomicSymbol as = ( AtomicSymbol) j.next();
700          int l = as.getSymbols().size();
701          if(len == -1) {
702            len = l;
703          } else if(len != l) {
704            throw new IllegalSymbolException(
705              "Can't build ambiguity symbol as the symbols have inconsistent " +
706              "length"
707            );
708          }
709          asSet.add(as);
710        }
711      }
712    }
713    if(asSet.size() == 0) {
714      return getGapSymbol();
715    } else if(asSet.size() == 1) {
716      return (Symbol) asSet.iterator().next();
717    } else {
718      if(len == 1) {
719        return new SimpleBasisSymbol(
720          annotation, new SimpleAlphabet(asSet)
721        );
722      } else {
723        List fs = factorize(alpha, asSet);
724        if(fs == null) {
725          return new SimpleSymbol(
726            annotation,
727            new SimpleAlphabet(asSet)
728          );
729        } else {
730          return new SimpleBasisSymbol(
731            annotation,
732            fs, new SimpleAlphabet(
733              expandBasis(alpha, fs, new ArrayList())
734            )
735          );
736        }
737      }
738    }
739  }
740
741  /**
742   * Generates a new CrossProductAlphabet from the give name.
743   *
744   * @param name  the name to parse
745   * @return the associated Alphabet
746   */
747  static public Alphabet generateCrossProductAlphaFromName(
748    String name
749  ) {
750    if(!name.startsWith("(") || !name.endsWith(")")) {
751      throw new BioError(
752        "Can't parse " + name +
753        " into a cross-product alphabet as it is not bracketed"
754      );
755    }
756
757    name = name.substring(1, name.length()-1).trim();
758    List aList = new ArrayList(); // the alphabets
759    int i = 0;
760    while(i < name.length()) {
761      if(name.charAt(i) == '(') {
762        int depth = 1;
763        int j = i+1;
764        while(j < name.length() && depth > 0) {
765          char c = name.charAt(j);
766          if(c == '(') {
767            depth++;
768          } else if(c == ')') {
769            depth--;
770          }
771          j++;
772        }
773        if(depth == 0) {
774          aList.add(alphabetForName(name.substring(i, j)));
775          i = j;
776        } else {
777          throw new BioError(
778            "Error parsing alphabet name: could not find matching bracket\n" +
779            name.substring(i)
780          );
781        }
782      } else {
783        int j = name.indexOf(" x ", i);
784        if(j < 0) {
785          aList.add(alphabetForName(name.substring(i).trim()));
786          i = name.length();
787        } else {
788          if(i != j){
789            aList.add(alphabetForName(name.substring(i, j).trim()));
790          }
791          i = j + " x ".length();
792        }
793      }
794    }
795
796    return getCrossProductAlphabet(aList);
797  }
798
799  /**
800   * <p>
801   * Retrieve a CrossProductAlphabet instance over the alphabets in aList.
802   * </p>
803   * 
804   * <p>
805   * If all of the alphabets in aList implements FiniteAlphabet then the
806   * method will return a FiniteAlphabet. Otherwise, it returns a non-finite
807   * alphabet.
808   * </p>
809   * 
810   * <p>
811   * If you call this method twice with a list containing the same alphabets,
812   * it will return the same alphabet. This promotes the re-use of alphabets
813   * and helps to maintain the 'flyweight' principal for finite alphabet
814   * symbols.
815   * </p>
816   * 
817   * <p>
818   * The resulting alphabet cpa will be retrievable via
819   * AlphabetManager.alphabetForName(cpa.getName())
820   * </p>
821   * @param aList a list of Alphabet objects
822   * @return a CrossProductAlphabet that is over the alphabets in aList
823   */
824  static public Alphabet getCrossProductAlphabet(List aList) {
825    return getCrossProductAlphabet(aList, (Alphabet) null);
826  }
827
828  
829  /**
830   * Attempts to create a cross product alphabet and register it under a name.
831   * @param aList A list of alphabets
832   * @param name The name which the new alphabet will be registered under.
833   * @throws org.biojava.bio.symbol.IllegalAlphabetException If the Alphabet cannot be made or a different 
834   * alphabet is already registed under this name.
835   * @return The CrossProductAlphabet
836   */
837  static public Alphabet getCrossProductAlphabet(List aList, String name)
838  throws IllegalAlphabetException {
839    Alphabet currentAlpha = (Alphabet) nameToAlphabet.get(name);
840    if(currentAlpha != null) {
841      if(currentAlpha.getAlphabets().equals(aList)) {
842        return currentAlpha;
843      } else {
844        throw new IllegalAlphabetException(name + " already registered");
845      }
846    } else {
847      Alphabet alpha = getCrossProductAlphabet(aList);
848      registerAlphabet(name, alpha);
849      return alpha;
850    }
851  }
852
853  /**
854   * <p>
855   * Retrieve a CrossProductAlphabet instance over the alphabets in aList.
856   * </p>
857   *
858   * <p>
859   * This method is most usefull for implementors of cross-product alphabets,
860   * allowing them to safely build the matches alphabets for ambiguity symbols.
861   * </p>
862   *
863   * <p>
864   * If all of the alphabets in aList implements FiniteAlphabet then the
865   * method will return a FiniteAlphabet. Otherwise, it returns a non-finite
866   * alphabet.
867   * </p>
868   *
869   * <p>
870   * If you call this method twice with a list containing the same alphabets,
871   * it will return the same alphabet. This promotes the re-use of alphabets
872   * and helps to maintain the 'flyweight' principal for finite alphabet
873   * symbols.
874   * </p>
875   *
876   * <p>
877   * The resulting alphabet cpa will be retrievable via
878   * AlphabetManager.alphabetForName(cpa.getName())
879   * </p>
880   *
881   * @param aList a list of Alphabet objects
882   * @param parent a parent alphabet
883   * @return a CrossProductAlphabet that is over the alphabets in aList
884   */
885  static public Alphabet getCrossProductAlphabet(
886    List aList, Alphabet parent
887  ) {
888    if(aList.size() == 0) {
889      return Alphabet.EMPTY_ALPHABET;
890    }
891
892    // This trap means that the `product' operator can be
893    // safely applied to a single alphabet.
894
895    if (aList.size() == 1)
896        return (Alphabet) aList.get(0);
897
898    if(crossProductAlphabets == null) {
899      crossProductAlphabets = new HashMap();
900    }
901
902    Alphabet cpa = (Alphabet) crossProductAlphabets.get(aList);
903
904    int size = 1;
905    if(cpa == null) {
906      for(Iterator i = aList.iterator(); i.hasNext(); ) {
907        Alphabet aa = (Alphabet) i.next();
908        if(! (aa instanceof FiniteAlphabet) ) {
909          cpa =  new InfiniteCrossProductAlphabet(aList);
910          break;
911        }
912        if(size <= 1000) {
913          size *= ((FiniteAlphabet) aa).size();
914        }
915      }
916      if(cpa == null) {
917        try {
918          if(size > 0 && size < 1000) {
919            cpa = new SimpleCrossProductAlphabet(aList, parent);
920          } else {
921            cpa = new SparseCrossProductAlphabet(aList);
922          }
923        } catch (IllegalAlphabetException iae) {
924          throw new BioError(
925            "Could not create SimpleCrossProductAlphabet for " + aList +
926            " even though we should be able to. No idea what is wrong."
927          );
928        }
929      }
930      crossProductAlphabets.put(new ArrayList(aList), cpa);
931      registerAlphabet(cpa.getName(), cpa);
932    }
933
934    return cpa;
935  }
936
937  private static Set expandMatches(Alphabet parent, List symList, List built) {
938    int indx = built.size();
939    if(indx < symList.size()) {
940      Symbol bs = (Symbol) symList.get(indx);
941      if(bs instanceof AtomicSymbol) {
942        built.add(bs);
943        return expandMatches(parent, symList, built);
944      } else {
945        Set syms = new HashSet();
946        Iterator i = ((FiniteAlphabet) bs.getMatches()).iterator();
947        while(i.hasNext()) {
948          List built2 = new ArrayList(built);
949          built2.add((AtomicSymbol) i.next());
950          syms.addAll(expandMatches(parent, symList, built2));
951        }
952        return syms;
953      }
954    } else {
955      try {
956        Symbol s = parent.getSymbol(built);
957        if(s instanceof AtomicSymbol) {
958          return Collections.singleton((AtomicSymbol) s);
959        } else {
960          Set syms = new HashSet();
961          for(Iterator i = ((FiniteAlphabet) s.getMatches()).iterator(); i.hasNext(); ) {
962            syms.add((AtomicSymbol) i.next());
963          }
964          return syms;
965        }
966      } catch (IllegalSymbolException ise) {
967        throw new BioError("Assertion Failure: Couldn't create symbol.", ise);
968      }
969    }
970  }
971
972  /**
973   * <p>
974   * Return a list of BasisSymbol instances that uniquely sum up all
975   * AtomicSymbol
976   * instances in symSet. If the symbol can't be represented by a single list of
977   * BasisSymbol instances, return null.
978   * </p>
979   * 
980   * <p>
981   * This method is most useful for implementers of Alphabet and Symbol. It
982   * probably should not be invoked by users.
983   * </p>
984   * @return a List of BasisSymbols
985   * @param symSet the Set of AtomicSymbol instances
986   * @param alpha the Alphabet instance that the Symbols are from
987   * @throws org.biojava.bio.symbol.IllegalSymbolException In practice it should not. If it does it probably
988   * indicates a subtle bug somewhere in AlphabetManager
989   */
990  public static List factorize(Alphabet alpha, Set symSet)
991  throws IllegalSymbolException {
992    List alphas = alpha.getAlphabets();
993    List facts = new ArrayList();
994    int size = symSet.size();
995    Set syms = new HashSet();
996    for(int col = 0; col < alphas.size(); col++) {
997      Alphabet a = (Alphabet) alphas.get(col);
998      for(Iterator i = symSet.iterator(); i.hasNext(); ) {
999        syms.add(
1000          (AtomicSymbol) ((AtomicSymbol)
1001          i.next()).getSymbols().get(col)
1002        );
1003      }
1004      int s = syms.size();
1005      if( (size % s) != 0 ) {
1006        return null;
1007      }
1008      size /= s;
1009      facts.add(a.getAmbiguity(syms));
1010      syms.clear();
1011    }
1012    if(size != 1) {
1013      return null;
1014    }
1015    return facts;
1016  }
1017
1018
1019
1020
1021    /**
1022     * Load additional Alphabets, defined in XML format, into the AlphabetManager's registry.
1023     * These can the be retrieved by calling <code>alphabetForName</code>.
1024     *
1025     * @param is an <code>InputSource</code> encapsulating the document to be parsed
1026     * @throws IOException if there is an error accessing the stream
1027     * @throws SAXException if there is an error while parsing the document
1028     * @throws BioException if a problem occurs when creating the new Alphabets.
1029     * @since 1.3
1030     */
1031
1032    public static void loadAlphabets(InputSource is)
1033        throws SAXException, IOException, BioException
1034    {
1035        try {
1036            SAXParserFactory spf = SAXParserFactory.newInstance();
1037            spf.setNamespaceAware(true);
1038            XMLReader parser = spf.newSAXParser().getXMLReader();
1039            parser.setContentHandler(new SAX2StAXAdaptor(new AlphabetManagerHandler()));
1040            parser.parse(is);
1041        } catch (ParserConfigurationException ex) {
1042            throw new BioException( "Unable to create XML parser", ex);
1043        }
1044    }
1045
1046    /**
1047     * StAX handler for the alphabetManager element
1048     */
1049
1050    private static class AlphabetManagerHandler extends StAXContentHandlerBase {
1051        public void startElement(String nsURI,
1052                                             String localName,
1053                                             String qName,
1054                                             Attributes attrs,
1055                                             DelegationManager dm)
1056             throws SAXException
1057         {
1058             if (localName.equals("alphabetManager")) {
1059                 // ignore
1060             } else if (localName.equals("symbol")) {
1061                 String name = attrs.getValue("name");
1062                 dm.delegate(new SymbolHandler(name));
1063             } else if (localName.equals("alphabet")) {
1064                 String name = attrs.getValue("name");
1065                 String parent = attrs.getValue("parent");
1066                 FiniteAlphabet parentAlpha = null;
1067                 if (parent != null && parent.length() > 0) {
1068                     parentAlpha = (FiniteAlphabet) nameToAlphabet.get(parent);
1069                 }
1070                 dm.delegate(new AlphabetHandler(name, parentAlpha));
1071             } else {
1072                 throw new SAXException(
1073                         "Unknown element in alphabetManager: " +
1074                         localName);
1075             }
1076         }
1077
1078         public void endElement(String nsURI,
1079                                String localName,
1080                                String qName,
1081                                StAXContentHandler delegate)
1082            throws SAXException
1083         {
1084             if (delegate instanceof SymbolHandler) {
1085                 SymbolHandler sh = (SymbolHandler) delegate;
1086                 //String name = sh.getName();
1087                 LifeScienceIdentifier lsid = sh.getLSID();
1088                 Symbol symbol = sh.getSymbol();
1089                 if (lsidToSymbol.containsKey(lsid)) {
1090                     throw new SAXException(
1091                     "There is already a top-level symbol named "
1092                     + lsid);
1093                 }
1094                 lsidToSymbol.put(lsid, symbol);
1095             } else if (delegate instanceof AlphabetHandler) {
1096                 AlphabetHandler ah = (AlphabetHandler) delegate;
1097                 String name = ah.getName();
1098                 FiniteAlphabet alpha = ah.getAlphabet();
1099                 registerAlphabet(name, alpha);
1100             }
1101         }
1102
1103         private class SymbolHandler extends StAXContentHandlerBase {
1104             private String name;
1105             private LifeScienceIdentifier lsid;
1106             private Symbol symbol;
1107             private Annotation annotation = new SmallAnnotation();
1108
1109             public SymbolHandler(String id) {
1110                try {
1111                  lsid = LifeScienceIdentifier.valueOf(id);
1112                  name = lsid.getObjectId();
1113                } catch (LifeScienceIdentifierParseException ex) {
1114                  throw new BioError("Malformed LSID - "+name, ex);
1115                }
1116             }
1117
1118             public void startElement(String nsURI,
1119                                                 String localName,
1120                                     String qName,
1121                                     Attributes attrs,
1122                                     DelegationManager dm)
1123                  throws SAXException
1124             {
1125                 if (localName.equals("symbol")) {
1126                     // ignore
1127                 } else if (localName.equals("description")) {
1128                     dm.delegate(new StringElementHandlerBase() {
1129                         protected void setStringValue(String s) {
1130                             try {
1131                                 annotation.setProperty("description", s);
1132                             } catch (ChangeVetoException ex) {
1133                                 throw new BioError( "Assertion failure: veto while modifying new Annotation", ex);
1134                             }
1135                         }
1136                     } );
1137                 } else {
1138                     throw new SAXException("Unknown element in symbol: " + localName);
1139                 }
1140             }
1141
1142             public void endTree() {
1143                 symbol = new WellKnownAtomicSymbol(
1144                    new FundamentalAtomicSymbol(
1145                        name,
1146                        annotation
1147                    ),
1148                    lsid
1149                  );
1150             }
1151
1152             Symbol getSymbol() {
1153                 return symbol;
1154             }
1155
1156             String getName() {
1157                 return name;
1158             }
1159
1160             LifeScienceIdentifier getLSID(){
1161               return lsid;
1162             }
1163         }
1164
1165         private class AlphabetHandler extends StAXContentHandlerBase {
1166             private String name;
1167             //private Map localSymbols;
1168             private WellKnownAlphabet alpha;
1169             private ImmutableWellKnownAlphabetWrapper alphaWrapper;
1170
1171             String getName() {
1172                 return name;
1173             }
1174
1175             FiniteAlphabet getAlphabet() {
1176                 return alphaWrapper;
1177             }
1178
1179             public void endTree() {
1180                 alpha.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN);
1181             }
1182
1183             public AlphabetHandler(String name, FiniteAlphabet parent) {
1184                 this.name = name;
1185                 //localSymbols = new OverlayMap(nameToSymbol);
1186                 alpha = new WellKnownAlphabet();
1187                 alpha.setName(name);
1188                 alphaWrapper = new ImmutableWellKnownAlphabetWrapper(alpha);
1189                 if (parent != null) {
1190                     for (Iterator i = parent.iterator(); i.hasNext(); ) {
1191                         WellKnownAtomicSymbol sym =
1192                             (WellKnownAtomicSymbol) i.next();
1193                         try {
1194                             alpha.addSymbol(sym);
1195                         } catch (Exception ex) {
1196                             throw new BioError(
1197                               "Couldn't initialize alphabet from parent", ex);
1198                         }
1199                         lsidToSymbol.put(sym.getIdentifier(), sym);
1200                     }
1201                 }
1202             }
1203
1204             public void startElement(String nsURI,
1205                                     String localName,
1206                                     String qName,
1207                                     Attributes attrs,
1208                                     DelegationManager dm)
1209                  throws SAXException
1210             {
1211                 if (localName.equals("alphabet")) {
1212                     // ignore
1213                 } else if (localName.equals("symbol")) {
1214                     String name = attrs.getValue("name");
1215                     dm.delegate(new SymbolHandler(name));
1216                 } else if (localName.equals("symbolref")) {
1217                     String name = attrs.getValue("name");
1218                    LifeScienceIdentifier lsid = null;
1219                    try {
1220                      lsid =
1221                          LifeScienceIdentifier.valueOf(name);
1222                    } catch (LifeScienceIdentifierParseException ex) {
1223                      throw new SAXException("Couldn't form a LSID from "+name);
1224                    }
1225                     Symbol sym = (Symbol) lsidToSymbol.get(lsid);
1226                     if (sym == null) {
1227                         throw new SAXException(
1228                           "Reference to non-existent symbol " + name);
1229                     }
1230                     addSymbol(sym);
1231                 } else if (localName.equals("characterTokenization")) {
1232                     String name = attrs.getValue("name");
1233                     boolean caseSensitive = "true".equals(attrs.getValue("caseSensitive"));
1234                     dm.delegate(new CharacterTokenizationHandler(name, alphaWrapper, lsidToSymbol, caseSensitive));
1235                 } else if (localName.equals("description")) {
1236                     dm.delegate(new StringElementHandlerBase() {
1237                         protected void setStringValue(String s) {
1238                             try {
1239                                 alpha.getAnnotation().setProperty("description", s);
1240                             } catch (ChangeVetoException ex) {
1241                                 throw new BioError( "Assertion failure: veto while modifying new Annotation", ex);
1242                             }
1243                         }
1244                     } );
1245                 } else {
1246                     throw new SAXException("Unknown element in alphabetl: " + localName);
1247                 }
1248             }
1249
1250             public void endElement(String nsURI,
1251                                                String localName,
1252                                    String qName,
1253                                    StAXContentHandler delegate)
1254                  throws SAXException
1255             {
1256                 if (delegate instanceof SymbolHandler) {
1257                     SymbolHandler sh = (SymbolHandler) delegate;
1258                     //String name = sh.getName();
1259                     Symbol symbol = sh.getSymbol();
1260                     LifeScienceIdentifier lsid = sh.getLSID();
1261                     lsidToSymbol.put(lsid, symbol);
1262                     addSymbol(symbol);
1263                 } else if (delegate instanceof CharacterTokenizationHandler) {
1264                     CharacterTokenizationHandler cth = (CharacterTokenizationHandler) delegate;
1265                     String name = cth.getName();
1266                     SymbolTokenization toke = cth.getTokenization();
1267                     alpha.putTokenization(name, toke);
1268                 }
1269             }
1270
1271             private void addSymbol(Symbol sym)
1272                 throws SAXException
1273             {
1274                 try {
1275                     alpha.addSymbol(sym);
1276                 } catch (ChangeVetoException cve) {
1277                     throw new BioError( "Assertion failure: veto while modifying new Alphabet", cve);
1278                 } catch (IllegalSymbolException ex) {
1279                     throw new SAXException("IllegalSymbolException adding symbol to alphabet");
1280                 }
1281             }
1282         }
1283
1284         private class CharacterTokenizationHandler extends StAXContentHandlerBase {
1285             private String name;
1286             private Map localSymbols;
1287             private SymbolTokenization toke;
1288             private boolean isAlternate;
1289
1290             String getName() {
1291                 return name;
1292             }
1293
1294             SymbolTokenization getTokenization() {
1295                 return toke;
1296             }
1297
1298             public CharacterTokenizationHandler(String name,
1299                                                 FiniteAlphabet alpha,
1300                                                 Map localSymbols,
1301                                                 boolean caseSensitive)
1302             {
1303
1304                 this.name = name;
1305                 this.localSymbols = new HashMap();
1306                 for (Iterator i = alpha.iterator(); i.hasNext(); ) {
1307                     WellKnownAtomicSymbol sym = (WellKnownAtomicSymbol) i.next();
1308                     this.localSymbols.put(sym.getIdentifier(), sym);
1309                 }
1310                 if(name.indexOf("alternate")==0) {
1311                     toke = new AlternateTokenization(alpha, caseSensitive);
1312                     isAlternate = true;
1313                 } else
1314                     toke = new CharacterTokenization(alpha, caseSensitive);
1315             }
1316
1317             public void startElement(String nsURI,
1318                                                 String localName,
1319                                     String qName,
1320                                     Attributes attrs,
1321                                     DelegationManager dm)
1322                  throws SAXException
1323             {
1324                 if (localName.equals("characterTokenization")) {
1325                     // ignore
1326                 } else if (localName.equals("atomicMapping")) {
1327                     dm.delegate(new MappingHandler(true));
1328                 } else if (localName.equals("ambiguityMapping")) {
1329                     dm.delegate(new MappingHandler(false));
1330                 } else if (localName.equals("gapSymbolMapping")) {
1331                     dm.delegate(new MappingHandler(false, true));
1332                 } else {
1333                     throw new SAXException("Unknown element in characterTokenization: " + localName);
1334                 }
1335             }
1336
1337             private class MappingHandler extends StAXContentHandlerBase {
1338                 public MappingHandler(boolean isAtomic, boolean isPureGap) {
1339                   this.isAtomic = isAtomic;
1340                   this.isPureGap = isPureGap;
1341                 }
1342
1343                 public MappingHandler(boolean isAtomic) {
1344                     this(isAtomic, false);
1345                 }
1346
1347                 boolean isAtomic;
1348                 boolean isPureGap;
1349                 Set symbols = new HashSet();
1350                 char c = '\0';
1351                 String str = "";
1352                 int level = 0;
1353
1354                 public void startElement(String nsURI,
1355                                          String localName,
1356                                          String qName,
1357                                          Attributes attrs,
1358                                          DelegationManager dm)
1359                     throws SAXException
1360                 {
1361                     if (level == 0) {
1362                         c = attrs.getValue("token").charAt(0);
1363                         if(isAlternate)
1364                             str = attrs.getValue("token");
1365                     } else {
1366                         if (localName.equals("symbolref")) {
1367                             String name = attrs.getValue("name");
1368                             LifeScienceIdentifier lsid = null;
1369                             try {
1370                               lsid = LifeScienceIdentifier.valueOf(name);
1371                             } catch (LifeScienceIdentifierParseException ex) {
1372                               throw new SAXException("Cannot for LSID from " + name);
1373                             }
1374                             Symbol sym = (Symbol) localSymbols.get(lsid);
1375                             if (sym == null) {
1376                                 throw new SAXException("Reference to non-existent symbol " + name);
1377                             }
1378                             symbols.add(sym);
1379                         } else {
1380                             throw new SAXException("Unknown element in mapping: " + localName);
1381                         }
1382                     }
1383                     ++level;
1384                 }
1385
1386                 public void endElement(String nsURI,
1387                                        String localName,
1388                                        String qName,
1389                                        StAXContentHandler delegate)
1390                     throws SAXException
1391                 {
1392                     --level;
1393                 }
1394
1395                 public void endTree()
1396                     throws SAXException
1397                 {
1398                     Symbol ambiSym;
1399                     if(isPureGap) {
1400                         ambiSym = getGapSymbol();
1401                     } else {
1402                         try {
1403                             ambiSym = toke.getAlphabet().getAmbiguity(symbols);
1404                         } catch (IllegalSymbolException ex) {
1405                             throw (SAXException)
1406                                     new SAXException("IllegalSymbolException binding mapping for " + c).initCause(ex);
1407                         }
1408                     }
1409                     if(isAlternate)
1410                        ((AlternateTokenization)toke).bindSymbol(ambiSym, str);
1411                     else
1412                        ((CharacterTokenization)toke).bindSymbol(ambiSym, c);
1413                 }
1414             }
1415         }
1416    }
1417
1418    private static class WellKnownTokenizationWrapper
1419        extends Unchangeable
1420        implements SymbolTokenization, Serializable
1421    {
1422        private String name;
1423        private Alphabet alphabet;
1424        private SymbolTokenization toke;
1425
1426        WellKnownTokenizationWrapper(Alphabet alpha, SymbolTokenization toke, String name) {
1427            super();
1428            this.alphabet = alpha;
1429            this.name = name;
1430            this.toke = toke;
1431        }
1432
1433        public Alphabet getAlphabet() {
1434            return alphabet;
1435        }
1436
1437        public TokenType getTokenType() {
1438            return toke.getTokenType();
1439        }
1440
1441        public StreamParser parseStream(SeqIOListener listener) {
1442            return toke.parseStream(listener);
1443        }
1444
1445        public Symbol parseToken(String s)
1446            throws IllegalSymbolException
1447        {
1448            return toke.parseToken(s);
1449        }
1450
1451        public String tokenizeSymbol(Symbol s)
1452            throws IllegalSymbolException
1453        {
1454            return toke.tokenizeSymbol(s);
1455        }
1456
1457        public String tokenizeSymbolList(SymbolList sl)
1458            throws IllegalAlphabetException, IllegalSymbolException
1459        {
1460            return toke.tokenizeSymbolList(sl);
1461        }
1462
1463        public Annotation getAnnotation() {
1464            return toke.getAnnotation();
1465        }
1466
1467        public Object writeReplace() {
1468            return new OPH(getAlphabet().getName(), name);
1469        }
1470
1471        private static class OPH implements Serializable {
1472            private String alphaName;
1473            private String name;
1474
1475            OPH(String alphaName, String name) {
1476                this.alphaName = alphaName;
1477                this.name = name;
1478            }
1479
1480            private Object readResolve() throws ObjectStreamException {
1481                try {
1482                    Alphabet alphabet = alphabetForName(alphaName);
1483                    return alphabet.getTokenization(name);
1484                } catch (Exception ex) {
1485                    throw new InvalidObjectException("Couldn't resolve tokenization " + name + " in alphabet " + alphaName);
1486                }
1487            }
1488        }
1489    }
1490
1491    /**
1492     * An alphabet contained WellKnownSymbols
1493     */
1494
1495    private static class WellKnownAlphabet
1496        extends SimpleAlphabet
1497    {
1498        public WellKnownAlphabet() {
1499            super();
1500        }
1501
1502        public WellKnownAlphabet(Set s) {
1503            super(s);
1504        }
1505
1506        protected Symbol getAmbiguityImpl(Set s)
1507            throws IllegalSymbolException
1508        {
1509            return getWellKnownAmbiguitySymbol(s);
1510        }
1511    }
1512
1513    /**
1514     * A wrapper which makes an Alphabet unchangable, and also fixes serialization
1515     */
1516
1517    private static class ImmutableWellKnownAlphabetWrapper
1518        extends Unchangeable
1519        implements FiniteAlphabet, Serializable
1520    {
1521        private FiniteAlphabet alpha;
1522        private Map tokenizationsByName = new HashMap();
1523
1524        public ImmutableWellKnownAlphabetWrapper(FiniteAlphabet alpha) {
1525            super();
1526            this.alpha = alpha;
1527        }
1528
1529        private Object writeReplace() {
1530            return new OPH(getName());
1531        }
1532
1533        public SymbolTokenization getTokenization(String name)
1534            throws BioException
1535        {
1536            SymbolTokenization toke = (SymbolTokenization) tokenizationsByName.get(name);
1537            if (toke == null) {
1538                if ("name".equals(name)) {
1539                    toke = new NameTokenization(this);
1540                } else {
1541                    toke = new WellKnownTokenizationWrapper(this, alpha.getTokenization(name), name);
1542                }
1543                tokenizationsByName.put(name, toke);
1544            }
1545            return toke;
1546        }
1547
1548        /**
1549         * Placeholder for a WellKnownAlphabet in a serialized
1550         * object stream.
1551         */
1552
1553         private static class OPH implements Serializable {
1554             private String name;
1555
1556             public OPH(String name) {
1557                 this.name = name;
1558             }
1559
1560             private Object readResolve() throws ObjectStreamException {
1561                 try {
1562                     Alphabet a = AlphabetManager.alphabetForName(name);
1563                     return a;
1564                 } catch (NoSuchElementException ex) {
1565                     throw new InvalidObjectException("Couldn't resolve alphabet " + name);
1566                 }
1567             }
1568         }
1569
1570        public boolean contains(Symbol s) {
1571            return alpha.contains(s);
1572        }
1573
1574        public List getAlphabets() {
1575            return Collections.singletonList(this);
1576        }
1577
1578        public Symbol getAmbiguity(Set s)
1579            throws IllegalSymbolException
1580        {
1581            return alpha.getAmbiguity(s);
1582        }
1583
1584        public Symbol getGapSymbol() {
1585            return alpha.getGapSymbol();
1586        }
1587
1588        public String getName() {
1589            return alpha.getName();
1590        }
1591
1592        public Symbol getSymbol(List l)
1593            throws IllegalSymbolException
1594        {
1595            return alpha.getSymbol(l);
1596        }
1597
1598        public void validate(Symbol s)
1599            throws IllegalSymbolException
1600        {
1601                alpha.validate(s);
1602        }
1603
1604        public void addSymbol(Symbol s)
1605            throws ChangeVetoException
1606        {
1607            throw new ChangeVetoException("Can't add symbols to Well Known Alphabets");
1608        }
1609
1610        public void removeSymbol(Symbol s)
1611            throws ChangeVetoException
1612        {
1613            throw new ChangeVetoException("Can't remove symbols from Well Known Alphabets");
1614        }
1615
1616        public Iterator iterator() {
1617            return  alpha.iterator();
1618        }
1619
1620        public int size() {
1621            return alpha.size();
1622        }
1623
1624        public Annotation getAnnotation() {
1625            return alpha.getAnnotation();
1626        }
1627    }
1628
1629    
1630    /**
1631     * A well-known gap. Resolved in serialized data
1632     */
1633    private static class WellKnownGapSymbol extends AbstractSimpleBasisSymbol implements Serializable{
1634        private SizeQueen sq;
1635        public WellKnownGapSymbol(List symList, SizeQueen sq) throws IllegalSymbolException{
1636            super(Annotation.EMPTY_ANNOTATION,
1637            symList,
1638            Alphabet.EMPTY_ALPHABET);
1639            this.sq = sq;
1640        }
1641        
1642        private Object readResolve() throws ObjectStreamException{
1643            //System.out.println("ping!!");
1644            return AlphabetManager.getGapSymbol(sq.getAlphas());
1645        }
1646    }
1647    /**
1648     * A well-known symbol.  Replaced by a placeholder in
1649     * serialized data.
1650     */
1651
1652    private static class WellKnownAtomicSymbol
1653        extends WellKnownBasisSymbol
1654        implements AtomicSymbol, Identifiable {
1655
1656        LifeScienceIdentifier lsid;
1657
1658        WellKnownAtomicSymbol(AtomicSymbol symbol, LifeScienceIdentifier lsid) {
1659            super(symbol);
1660            this.lsid = lsid;
1661        }
1662
1663        public LifeScienceIdentifier getIdentifier(){
1664          return lsid;
1665        }
1666
1667        public Alphabet getMatches() {
1668            return new SingletonAlphabet(this);
1669        }
1670
1671        private Object writeReplace() {
1672            return new WellKnownAtomicSymbol.OPH(getIdentifier());
1673        }
1674
1675        /**
1676         * Object Place Holder
1677         */
1678        private static class OPH implements Serializable {
1679            private LifeScienceIdentifier name;
1680
1681            public OPH(LifeScienceIdentifier name) {
1682                this.name = name;
1683            }
1684
1685            private Object readResolve() throws ObjectStreamException {
1686                try {
1687                    return symbolForLifeScienceID(name);
1688                } catch (NoSuchElementException ex) {
1689                    throw new InvalidObjectException(
1690                        "Couldn't resolve symbol:" + name
1691                    );
1692                }
1693            }
1694        }
1695    }
1696
1697    private static class WellKnownBasisSymbol
1698            extends Unchangeable
1699            implements BasisSymbol, Serializable
1700    {
1701        protected BasisSymbol symbol;
1702        private Set matches;
1703
1704        WellKnownBasisSymbol(BasisSymbol symbol) {
1705            super();
1706            symbol.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN); // Immutable
1707            this.symbol = symbol;
1708            this.matches = new HashSet();
1709            for (Iterator i = ((FiniteAlphabet) symbol.getMatches()).iterator(); i.hasNext(); ) {
1710                matches.add(i.next());
1711            }
1712        }
1713
1714        Symbol getSymbol() {
1715            return symbol;
1716        }
1717
1718        public int hashCode() {
1719            return symbol.hashCode();
1720        }
1721
1722        public boolean equals(Object o) {
1723            if (o instanceof WellKnownBasisSymbol) {
1724                return symbol.equals(((WellKnownBasisSymbol) o).getSymbol());
1725            } else {
1726                return false;
1727            }
1728        }
1729
1730        public String getName() {
1731            return symbol.getName();
1732        }
1733
1734        public Alphabet getMatches() {
1735            return symbol.getMatches();
1736        }
1737
1738        public List getSymbols() {
1739            return Collections.singletonList(this);
1740        }
1741
1742        public Annotation getAnnotation() {
1743            return symbol.getAnnotation();
1744        }
1745
1746        private Object writeReplace() {
1747            return new OPH(matches);
1748        }
1749
1750
1751        private static class OPH implements Serializable {
1752            private Set matches;
1753
1754            public OPH(Set matches) {
1755                OPH.this.matches = matches;
1756            }
1757
1758            private Object readResolve() /* throws ObjectStreamException */ {
1759                return getWellKnownAmbiguitySymbol(matches);
1760            }
1761        }
1762    }
1763
1764  /**
1765   * <p>
1766   * The class representing the Gap symbol.
1767   * </p>
1768   *
1769   * <p>
1770   * The gap is quite special. It is an ambiguity symbol with an empty alphabet.
1771   * This means that it notionaly represents an unfilled slot in a sequence.
1772   * It should be a singleton, hence the
1773   * placement in AlphabetManager and also the method normalize.
1774   * </p>
1775   *
1776   * @author Matthew Pocock
1777   */
1778  private static class GapSymbol
1779    extends
1780      Unchangeable
1781    implements
1782      Symbol,
1783      Serializable
1784  {
1785      public GapSymbol() {
1786      }
1787
1788      public String getName() {
1789          return "gap";
1790      }
1791
1792      public char getToken() {
1793          return '-';
1794      }
1795
1796      public Annotation getAnnotation() {
1797          return Annotation.EMPTY_ANNOTATION;
1798      }
1799
1800      public Alphabet getMatches() {
1801          return Alphabet.EMPTY_ALPHABET;
1802      }
1803      
1804      
1805       private Object readResolve() throws ObjectStreamException {
1806           return AlphabetManager.getGapSymbol();
1807       }
1808  }
1809  
1810
1811  /**
1812   * Get an indexer for a specified alphabet.
1813   *
1814   * @param alpha The alphabet to index
1815   * @return an AlphabetIndex instance
1816   * @since 1.1
1817   */
1818
1819  /**
1820   * Get an indexer for a specified alphabet.
1821   *
1822   * @param alpha The alphabet to index
1823   * @return an AlphabetIndex instance
1824   * @since 1.1
1825   */
1826  public static AlphabetIndex getAlphabetIndex(
1827    FiniteAlphabet alpha
1828  ) {
1829    final int generateIndexSize = 160;
1830    AlphabetIndex ai = (AlphabetIndex) alphabetToIndex.get(alpha);
1831    if(ai == null) {
1832      int size = alpha.size();
1833      if(size <= generateIndexSize) {
1834        ai = new LinearAlphabetIndex(alpha);
1835      } else {
1836        if(alpha.getAlphabets().size() > 1) {
1837          ai = new CrossProductAlphabetIndex(alpha);
1838        } else {
1839          ai = new HashedAlphabetIndex(alpha);
1840        }
1841      }
1842      alphabetToIndex.put(alpha, ai);
1843    }
1844    return ai;
1845  }
1846
1847  /**
1848   * Get an indexer for an array of symbols.
1849   *
1850   * @param syms the Symbols to index in that order
1851   * @return an AlphabetIndex instance
1852   * @since 1.1
1853   */
1854  public static AlphabetIndex getAlphabetIndex (
1855    Symbol[] syms
1856  ) throws IllegalSymbolException, BioException {
1857    return new LinearAlphabetIndex(syms);
1858  }
1859
1860  private static final class SizeQueen extends AbstractList implements Serializable{
1861    private final List alphas;
1862
1863    public SizeQueen(List alphas) {
1864      this.alphas = alphas;
1865    }
1866
1867    public int size() {
1868      return alphas.size();
1869    }
1870    
1871    public List getAlphas(){
1872        return this.alphas;
1873    }
1874
1875    public Object get(int pos) {
1876      Alphabet a = (Alphabet) alphas.get(pos);
1877      List al = a.getAlphabets();
1878      int size = al.size();
1879      if(size > 1) {
1880        return new SizeQueen(al);
1881      } else {
1882        return new Integer(size);
1883      }
1884    }
1885  }
1886
1887  private static Symbol getWellKnownAmbiguitySymbol(Set s) {
1888      Symbol sym = (Symbol) ambiguitySymbols.get(s);
1889      if (sym == null) {
1890          SimpleAlphabet matchAlpha = new WellKnownAlphabet(s);
1891          sym = new WellKnownBasisSymbol(new SimpleBasisSymbol(Annotation.EMPTY_ANNOTATION, matchAlpha));
1892          ambiguitySymbols.put(new HashSet(s), sym);
1893      }
1894      return sym;
1895  }
1896}