001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.symbol;
024
025import java.io.NotSerializableException;
026import java.io.ObjectStreamException;
027import java.io.Serializable;
028import java.util.Collections;
029import java.util.Iterator;
030import java.util.List;
031import java.util.NoSuchElementException;
032import java.util.Set;
033
034import org.biojava.bio.Annotation;
035import org.biojava.bio.BioError;
036import org.biojava.bio.seq.io.IntegerTokenization;
037import org.biojava.bio.seq.io.SubIntegerTokenization;
038import org.biojava.bio.seq.io.SymbolTokenization;
039import org.biojava.utils.ChangeVetoException;
040import org.biojava.utils.SingletonList;
041import org.biojava.utils.StaticMemberPlaceHolder;
042import org.biojava.utils.Unchangeable;
043import org.biojava.utils.cache.WeakValueHashMap;
044
045/**
046 * <p>
047 * An efficient implementation of an Alphabet over the infinite set of integer
048 * values.
049 * </p>
050 *
051 * <p>
052 * This class can be used to represent lists of integer numbers as a
053 * SymbolList with the alphabet IntegerAlphabet. These lists can then be
054 * annotated with features, or fed into dynamic-programming algorithms, or
055 * processed as per any other SymbolList object.
056 * </p>
057 *
058 * <p>
059 * Object identity should be used to decide if two IntegerSymbol objects are
060 * the same. IntegerAlphabet ensures that all IntegerSymbol instances are
061 * canonicalized.
062 * </p>
063 *
064 * @author Matthew Pocock
065 * @author Mark Schreiber
066 * @author Thomas Down
067 */
068
069public final class IntegerAlphabet
070  extends
071    Unchangeable
072  implements
073    Alphabet,
074    Serializable
075{
076  /**
077   * The singleton instance of the IntegerAlphabet class.
078   */
079  public static IntegerAlphabet INSTANCE;
080
081  private Object writeReplace() throws ObjectStreamException {
082    try {
083      return new StaticMemberPlaceHolder(IntegerAlphabet.class.getField("INSTANCE"));
084    } catch (NoSuchFieldException nsfe) {
085      throw new NotSerializableException(nsfe.getMessage());
086    }
087  }
088
089  /**
090   * Construct a finite contiguous subset of the <code>IntegerAlphabet</code>.
091   * Useful for making CrossProductAlphabets with other <code>FiniteAlphabet</code>s.
092   *
093   * @param min the lower bound of the Alphabet
094   * @param max the upper bound of the Alphabet
095   * @throws IllegalArgumentException if max < min
096   * @return A FiniteAlphabet from min to max <b>inclusive</b>.
097   */
098  public static SubIntegerAlphabet getSubAlphabet(int min, int max)
099  throws IllegalArgumentException {
100    String name = "SUBINTEGER["+min+".."+max+"]";
101    if(AlphabetManager.registered(name)){
102      return (SubIntegerAlphabet) (AlphabetManager.alphabetForName(name));
103    }
104          
105    FiniteAlphabet a = new SubIntegerAlphabet(min, max);
106    AlphabetManager.registerAlphabet(a.getName(),a);
107  
108    return (SubIntegerAlphabet) (AlphabetManager.alphabetForName(name));
109  }
110
111  /**
112   * Retrieve a SymbolList view of an array of integers.
113   * <p>
114   * The returned object is a view onto the underlying array, and does not copy
115   * it. Changes made to the original array will alter the symulting SymbolList.
116   *
117   * @param iArray  the array of integers to view
118   * @return a SymbolList over the IntegerAlphabet that represent the values in
119   *         iArray
120   */
121  public static SymbolList fromArray(int [] iArray) {
122    return new IntegerArray(iArray);
123  }
124
125  /**
126   * Retrieve the single IntegerAlphabet instance.
127   *
128   * @return the singleton IntegerAlphabet instance
129   */
130  public static IntegerAlphabet getInstance() {
131    if(INSTANCE == null) {
132      INSTANCE = new IntegerAlphabet();
133      //add an alias
134      AlphabetManager.registerAlphabet("Alphabet of all integers.", INSTANCE);
135    }
136
137    return INSTANCE;
138  }
139
140  /**
141   * Canonicalization map for ints and references to symbols.
142   */
143    private WeakValueHashMap intToSym;
144
145  private IntegerAlphabet() {
146      intToSym = new WeakValueHashMap();
147  }
148
149  /**
150   * Retrieve the Symbol for an int.
151   *
152   * @param val  the int to view
153   * @return a IntegerSymbol embodying val
154   */
155
156  public synchronized IntegerSymbol getSymbol(int val) {
157      Integer i = new Integer(val);
158      IntegerSymbol sym = (IntegerSymbol) intToSym.get(i);
159      if(sym == null) {
160          sym = new IntegerSymbol(val);
161          intToSym.put(i, sym);
162      }
163      return sym;
164  }
165
166  public Symbol getGapSymbol() {
167    return AlphabetManager.getGapSymbol(getAlphabets());
168  }
169
170  public Annotation getAnnotation() {
171    return Annotation.EMPTY_ANNOTATION;
172  }
173
174  public List getAlphabets() {
175    return new SingletonList(this);
176  }
177
178  public Symbol getSymbol(List symList)
179  throws IllegalSymbolException {
180    throw new BioError("Unimplemneted method");
181  }
182
183  public Symbol getAmbiguity(Set symSet)
184  throws IllegalSymbolException {
185    throw new BioError("Unimplemneted method");
186  }
187
188  public boolean contains(Symbol s) {
189    if(s instanceof IntegerSymbol) {
190      return true;
191    } else {
192      return false;
193    }
194  }
195
196  public void validate(Symbol s) throws IllegalSymbolException {
197    if(!contains(s)) {
198      throw new IllegalSymbolException(
199        "Only symbols of type IntegerAlphabet.IntegerSymbol are valid for this alphabet.\n" +
200        "(" + s.getClass() + ") " + s.getName()
201      );
202    }
203  }
204
205  public String getName() {
206    return "INTEGER";
207  }
208
209  /**
210   * Creates a new parser (Mark Schreiber 3 May 2001).
211   *
212   * @param name Currently only "token" is supported. You may also
213   * use "default" as a synonym of "token"
214   * @return an IntegerParser.
215   */
216  public SymbolTokenization getTokenization(String name) {
217    if(name.equals("token") || name.equals("default")){
218      return new IntegerTokenization();
219    }else{
220      throw new NoSuchElementException(name + " parser not supported by IntegerAlphabet yet");
221    }
222  }
223
224  /**
225   * A single int value.
226   * <p>
227   * @author Matthew Pocock
228   */
229  public static class IntegerSymbol
230    extends
231      Unchangeable
232    implements
233      AtomicSymbol,
234      Serializable
235  {
236    private final int val;
237    private final Alphabet matches;
238
239    public Annotation getAnnotation() {
240      return Annotation.EMPTY_ANNOTATION;
241    }
242
243    public String getName() {
244      return val + "";
245    }
246
247    public int intValue() {
248      return val;
249    }
250
251    public Alphabet getMatches() {
252      return matches;
253    }
254
255    public List getSymbols() {
256      return new SingletonList(this);
257    }
258
259    public Set getBases() {
260      return Collections.singleton(this);
261    }
262
263    protected IntegerSymbol(int val) {
264      this.val = val;
265      this.matches = new SingletonAlphabet(this);
266    }
267
268    public int hashCode(){
269      int result = 17;
270      result = 37*result+intValue();
271      return result;
272    }
273
274    public boolean equals(Object o){
275      if(o == this) return true;
276      if(o instanceof IntegerSymbol){
277        IntegerSymbol i = (IntegerSymbol) o;
278        if (i.intValue() == this.intValue()) {
279          return true;
280        }
281      }
282      return false;
283    }
284  }
285
286  /**
287   * A light-weight implementation of SymbolList that allows an array to
288   * appear to be a SymbolList.
289   *
290   * @author Matthew Pocock
291   */
292  private static class IntegerArray
293  extends AbstractSymbolList implements Serializable {
294    private final int [] iArray;
295
296    public Alphabet getAlphabet() {
297      return INSTANCE;
298    }
299
300    public Symbol symbolAt(int i) {
301      return new IntegerSymbol(iArray[i-1]);
302    }
303
304    public int length() {
305      return iArray.length;
306    }
307
308    public IntegerArray(int [] iArray) {
309      this.iArray = iArray;
310    }
311  }
312
313  /**
314   * A class to represent a finite contiguous subset of the infinite IntegerAlphabet
315   *
316   * @author Mark Schreiber
317   * @author Matthew Pocock
318   * @since 1.3
319   */
320  public static class SubIntegerAlphabet
321  extends AbstractAlphabet {
322    private int min;
323    private int max;
324    private String name; // cache this for performance
325
326    /**
327     * Construct a contiguous sub alphabet with the integers from min to max inclusive.
328     */
329    private SubIntegerAlphabet(int min, int max) throws IllegalArgumentException{
330      if(max < min) {
331        throw new IllegalArgumentException(
332          "min must be less than max: " +
333          min + " : " + max
334        );
335      }
336
337      this.min = min;
338      this.max = max;
339
340      this.name = "SUBINTEGER["+min+".."+max+"]";
341    }
342
343    public String getName() {
344      return name;
345    }
346
347    protected boolean containsImpl(AtomicSymbol sym) {
348      if(!IntegerAlphabet.getInstance().contains(sym)) {
349        return false;
350      }
351
352      IntegerSymbol is = (IntegerSymbol) sym;
353      return is.intValue() >= min && is.intValue() <= max;
354    }
355
356    /**
357     * @param name Currently only "token" is supported.
358     * @return an IntegerParser.
359     */
360    public SymbolTokenization getTokenization(String name) {
361      if(name.equals("token") || name.equals("default")){
362        return new SubIntegerTokenization(this);
363      }else{
364        throw new NoSuchElementException(name + " parser not supported by IntegerAlphabet yet");
365      }
366    }
367
368    public IntegerSymbol getSymbol(int val)
369    throws IllegalSymbolException {
370      if(val < min || val > max) {
371        throw new IllegalSymbolException(
372          "Could not get Symbol for value " +
373          val + " as it is not in the range " +
374          min + " : " + max
375        );
376      }
377
378      return IntegerAlphabet.getInstance().getSymbol(val);
379    }
380
381    public int size() {
382      return max - min + 1;
383    }
384
385    public List getAlphabets() {
386      return new SingletonList(this);
387    }
388
389
390    protected AtomicSymbol getSymbolImpl(List symL) throws
391        IllegalSymbolException {
392
393      if (symL.size() != 1) {
394        throw new IllegalSymbolException(
395            "SubIntegerAlphabet is one-dimensional: " + this.getName() +
396            " : " + symL);
397      }
398
399      AtomicSymbol s = (AtomicSymbol) symL.get(0);
400      this.validate(s);
401      return s;
402    }
403
404    protected void addSymbolImpl(AtomicSymbol sym)
405    throws ChangeVetoException {
406      throw new ChangeVetoException(
407        "Can't add symbols to immutable alphabet " +
408        getName()
409      );
410    }
411
412    public void removeSymbol(Symbol sym)
413    throws ChangeVetoException {
414      throw new ChangeVetoException(
415        "Can't remove symbols from immutable alphabet " +
416        getName()
417      );
418    }
419
420    public Iterator iterator() {
421      return new Iterator() {
422        int indx = min;
423
424        public boolean hasNext() {
425          return indx <= max;
426        }
427
428        public Object next() {
429          try {
430            Symbol sym = getSymbol(indx);
431            indx++;
432            return sym;
433          } catch (IllegalSymbolException ise) {
434            throw new BioError(
435              "Assertion Failure: symbol " + indx +
436              " produced by iterator but not found in " + getName()
437              ,ise
438            );
439          }
440        }
441
442        public void remove() {
443          throw new UnsupportedOperationException();
444        }
445      };
446    }
447
448    public Annotation getAnnotation() {
449      return Annotation.EMPTY_ANNOTATION;
450    }
451  }
452}