001package org.biojava.bio.symbol;
002
003import java.io.IOException;
004import java.io.Serializable;
005import org.biojava.bio.seq.io.ChunkedSymbolListFactory;
006import org.biojava.utils.ChangeEvent;
007
008import org.biojava.utils.ChangeListener;
009import org.biojava.utils.ChangeSupport;
010import org.biojava.utils.ChangeVetoException;
011
012/**
013 * SymbolList implementation using constant-size chunks. Each chunk provides
014 * the same number of symbols (except the last one, which may be shorter). When
015 * a request for symbols comes in, firstly the apropreate chunk is located, and
016 * then the symbols are extracted. This implementation is used in the IO package
017 * to avoid allocating and re-allocating memory when the total length of the
018 * symbol list is unknown. It can also be used when chunks are to be lazily
019 * fetched from some high-latency stoorage by implementing a single lazy-fetch
020 * SymbolList class and populating a ChunkedSymbolList with a complete
021 * tile-path of them.
022 *
023 * @author David Huen
024 * @author Matthew Pocock
025 * @author George Waldon
026 */
027public class ChunkedSymbolList
028        extends AbstractSymbolList
029        implements Serializable
030{
031  // state
032  private SymbolList [] chunks;
033  private final int chunkSize;
034  private final Alphabet alpha;
035  private int length;
036
037  // cached info for speedups
038  private transient int currentMin = Integer.MAX_VALUE;
039  private transient int currentMax = Integer.MIN_VALUE;
040  private transient SymbolList currentChunk = null;
041
042  private void readObject(java.io.ObjectInputStream stream)
043          throws IOException, ClassNotFoundException
044  {
045    stream.defaultReadObject();
046
047    currentMin = Integer.MAX_VALUE;
048    currentMax = Integer.MIN_VALUE;
049    currentChunk = null;
050  }
051
052  protected void finalize() throws Throwable {
053    super.finalize();
054    alpha.removeChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
055  }
056
057  public ChunkedSymbolList(SymbolList [] chunks,
058                           int chunkSize,
059                           int length,
060                           Alphabet alpha)
061  {
062    this.chunks = chunks;
063    this.chunkSize = chunkSize;
064    this.length = length;
065    this.alpha = alpha;
066    alpha.addChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
067  }
068
069  public Alphabet getAlphabet() {
070    return alpha;
071  }
072
073  public int length() {
074    return length;
075  }
076
077  public synchronized Symbol symbolAt(int pos) {
078    int offset;
079    --pos;
080
081    if ((pos < currentMin) || (pos > currentMax)) {
082      // bad - we are outside the current chunk
083      int chnk = pos / chunkSize;
084      offset =  pos % chunkSize;
085
086      currentMin = pos - offset;
087      currentMax = currentMin + chunkSize - 1;
088      currentChunk = chunks[chnk];
089    }
090    else {
091      offset = pos - currentMin;
092    }
093
094    return currentChunk.symbolAt(offset + 1);
095  }
096
097  public SymbolList subList(int start, int end) {
098    if (start < 1 || end > length()) {
099      throw new IndexOutOfBoundsException(
100              "Sublist index out of bounds " + length() + ":" + start + "," + end
101      );
102    }
103
104    if (end < start) {
105      throw new IllegalArgumentException(
106              "end must not be lower than start: start=" + start + ", end=" + end
107      );
108    }
109
110    //
111    // Mildly optimized for case where from and to are within
112    // the same chunk.
113    //
114
115    int afrom = start - 1;
116    int ato = end - 1;
117    int cfrom = afrom / chunkSize;
118    if (ato / chunkSize == cfrom) {
119      return chunks[cfrom].subList((afrom % chunkSize) + 1, (ato % chunkSize) + 1);
120    } else {
121      return super.subList(start, end);
122    }
123  }
124  
125    public void edit(Edit edit) throws IllegalAlphabetException,
126            ChangeVetoException {
127        ChangeSupport cs;
128        ChangeEvent cevt;
129        Symbol[] dest;
130
131        // first make sure that it is in bounds
132        if ((edit.pos + edit.length > length +1 ) || (edit.pos <= 0) || edit.length < 0){
133            throw new IndexOutOfBoundsException();
134        }
135        // make sure that the symbolList is of the correct alphabet
136        if (( edit.replacement.getAlphabet() != getAlphabet()) &&  (edit.replacement != SymbolList.EMPTY_LIST)){
137            throw new IllegalAlphabetException();
138        }
139
140        // give the listeners a change to veto this
141         // create a new change event ->the EDIT is a static final variable of type ChangeType in SymbolList interface
142        cevt = new ChangeEvent(this, SymbolList.EDIT, edit);
143        cs = getChangeSupport(SymbolList.EDIT);
144        synchronized(cs) {
145            // let the listeners know what we want to do
146            cs.firePreChangeEvent(cevt);
147        
148            // now for the edit
149            int posRightFragInSourceArray = edit.pos + edit.length - 1;
150            int rightFragLength = length - posRightFragInSourceArray;
151            int posRightFragInDestArray = posRightFragInSourceArray + edit.replacement.length() - edit.length;
152            int posReplaceFragInDestArray = edit.pos - 1;
153            int replaceFragLength = edit.replacement.length();
154            int newLength = length + replaceFragLength - edit.length;
155
156            // extend the array
157            dest = new Symbol[newLength];
158            
159            // copy symbols before the edit and make sure we are not editing the edit at the same time (hoops!)
160            int leftFragLength = edit.pos - 1;
161            for (int i = 0; i < chunks.length; i++) {
162                SymbolList chunk = chunks[i];
163                Symbol[] chunkSymbol = ((SimpleSymbolList) chunk).getSymbolArray();
164                int desStart = i * chunkSize;
165                int desEnd = i * chunkSize + chunk.length();
166                if (desEnd >= leftFragLength) {
167                    desEnd = leftFragLength;
168                    System.arraycopy(chunkSymbol, 0, dest, desStart, desEnd - desStart);
169                    break;
170                }
171                System.arraycopy(chunkSymbol, 0, dest, desStart, desEnd - desStart);
172            }
173            
174            // copy the symbols after the edit
175            if (rightFragLength > 0) {
176                int chunkNum = posRightFragInSourceArray / chunkSize;
177                int chunkStart = posRightFragInSourceArray % chunkSize;
178                int chunkCopied = 0;
179                for (int i = chunkNum; i < chunks.length; i++) {
180                    SymbolList chunk = chunks[i];
181                    Symbol[] chunkSymbol = ((SimpleSymbolList) chunk).getSymbolArray();
182                    int desStart = posRightFragInDestArray + chunkCopied;
183                    int desLength = chunk.length() - chunkStart;
184                    int left = rightFragLength - chunkCopied;
185                    if (left == 0) {
186                        break;
187                    }
188                    if (desLength > left) {
189                        desLength = left;
190                    }
191                    System.arraycopy(chunkSymbol, chunkStart, dest, desStart, desLength);
192                    if (chunkStart != 0) {
193                        chunkStart = 0;
194                    }
195                    chunkCopied += desLength;
196                }
197            }
198
199            // copy the symbols within the edit
200            for (int i = 1; i <= replaceFragLength; i++) {
201                dest[posReplaceFragInDestArray + i - 1] = edit.replacement.symbolAt(i);
202            }
203
204            //Make the chunks
205            int chunkNum = newLength/chunkSize;
206            int chunkLeft = newLength%chunkSize;
207            int chunkReused = (edit.pos-1)/chunkSize;
208            if(chunkLeft>0) chunkNum++;
209            SymbolList [] symListArray = new SymbolList[chunkNum];
210            for(int i=0;i<chunkNum-1;i++) {
211                if(i<chunkReused) {
212                    symListArray[i] = chunks[i];
213                    continue;
214                }
215                Symbol[] chunk = new Symbol[chunkSize];
216                System.arraycopy(dest, i*chunkSize, chunk, 0, chunkSize);
217                symListArray[i] = new SimpleSymbolList(chunk, chunkSize, getAlphabet());
218            }
219            if(chunkLeft>0) {
220                Symbol[] chunk = new Symbol[chunkLeft];
221                System.arraycopy(dest, (chunkNum-1)*chunkSize, chunk, 0, chunkLeft);
222                symListArray[chunkNum-1] = new SimpleSymbolList(chunk, chunkLeft, getAlphabet());
223            } else {
224                Symbol[] chunk = new Symbol[chunkSize];
225                System.arraycopy(dest, (chunkNum-1)*chunkSize, chunk, 0, chunkSize);
226                symListArray[chunkNum-1] = new SimpleSymbolList(chunk, chunkSize, getAlphabet());
227            }
228
229            chunks = symListArray;
230            length = newLength;
231            currentMin = Integer.MAX_VALUE;
232            currentMax = Integer.MIN_VALUE;
233            currentChunk = null;
234
235            cs.firePostChangeEvent(cevt);
236        }
237    }
238}