001package org.biojava.bio.symbol;
002
003import java.io.IOException;
004import java.io.Serializable;
005
006import org.biojava.utils.ChangeListener;
007
008/**
009 * SymbolList implementation using constant-size chunks. Each chunk provides
010 * the same number of symbols (except the last one, which may be shorter). When
011 * a request for symbols comes in, firstly the apropreate chunk is located, and
012 * then the symbols are extracted. This implementation is used in the IO package
013 * to avoid allocating and re-allocating memory when the total length of the
014 * symbol list is unknown. It can also be used when chunks are to be lazily
015 * fetched from some high-latency stoorage by implementing a single lazy-fetch
016 * SymbolList class and populating a ChunkedSymbolList with a complete
017 * tile-path of them.
018 *
019 * @author David Huen
020 * @author Matthew Pocock
021 */
022public class ChunkedSymbolList
023        extends AbstractSymbolList
024        implements Serializable
025{
026  // state
027  private SymbolList [] chunks;
028  private final int chunkSize;
029  private final Alphabet alpha;
030  private final int length;
031
032  // cached info for speedups
033  private transient int currentMin = Integer.MAX_VALUE;
034  private transient int currentMax = Integer.MIN_VALUE;
035  private transient SymbolList currentChunk = null;
036
037  private void readObject(java.io.ObjectInputStream stream)
038          throws IOException, ClassNotFoundException
039  {
040    stream.defaultReadObject();
041
042    currentMin = Integer.MAX_VALUE;
043    currentMax = Integer.MIN_VALUE;
044    currentChunk = null;
045  }
046
047  protected void finalize() throws Throwable {
048    super.finalize();
049    alpha.removeChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
050  }
051
052  public ChunkedSymbolList(SymbolList [] chunks,
053                           int chunkSize,
054                           int length,
055                           Alphabet alpha)
056  {
057    this.chunks = chunks;
058    this.chunkSize = chunkSize;
059    this.length = length;
060    this.alpha = alpha;
061    alpha.addChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
062  }
063
064  public Alphabet getAlphabet() {
065    return alpha;
066  }
067
068  public int length() {
069    return length;
070  }
071
072  public synchronized Symbol symbolAt(int pos) {
073    int offset;
074    --pos;
075
076    if ((pos < currentMin) || (pos > currentMax)) {
077      // bad - we are outside the current chunk
078      int chnk = pos / chunkSize;
079      offset =  pos % chunkSize;
080
081      currentMin = pos - offset;
082      currentMax = currentMin + chunkSize - 1;
083      currentChunk = chunks[chnk];
084    }
085    else {
086      offset = pos - currentMin;
087    }
088
089    return currentChunk.symbolAt(offset + 1);
090  }
091
092  public SymbolList subList(int start, int end) {
093    if (start < 1 || end > length()) {
094      throw new IndexOutOfBoundsException(
095              "Sublist index out of bounds " + length() + ":" + start + "," + end
096      );
097    }
098
099    if (end < start) {
100      throw new IllegalArgumentException(
101              "end must not be lower than start: start=" + start + ", end=" + end
102      );
103    }
104
105    //
106    // Mildly optimized for case where from and to are within
107    // the same chunk.
108    //
109
110    int afrom = start - 1;
111    int ato = end - 1;
112    int cfrom = afrom / chunkSize;
113    if (ato / chunkSize == cfrom) {
114      return chunks[cfrom].subList((afrom % chunkSize) + 1, (ato % chunkSize) + 1);
115    } else {
116      return super.subList(start, end);
117    }
118  }
119}