001package org.biojava.bio.symbol; 002 003import java.io.IOException; 004import java.io.Serializable; 005import org.biojava.bio.seq.io.ChunkedSymbolListFactory; 006import org.biojava.utils.ChangeEvent; 007 008import org.biojava.utils.ChangeListener; 009import org.biojava.utils.ChangeSupport; 010import org.biojava.utils.ChangeVetoException; 011 012/** 013 * SymbolList implementation using constant-size chunks. Each chunk provides 014 * the same number of symbols (except the last one, which may be shorter). When 015 * a request for symbols comes in, firstly the apropreate chunk is located, and 016 * then the symbols are extracted. This implementation is used in the IO package 017 * to avoid allocating and re-allocating memory when the total length of the 018 * symbol list is unknown. It can also be used when chunks are to be lazily 019 * fetched from some high-latency stoorage by implementing a single lazy-fetch 020 * SymbolList class and populating a ChunkedSymbolList with a complete 021 * tile-path of them. 022 * 023 * @author David Huen 024 * @author Matthew Pocock 025 * @author George Waldon 026 */ 027public class ChunkedSymbolList 028 extends AbstractSymbolList 029 implements Serializable 030{ 031 // state 032 private SymbolList [] chunks; 033 private final int chunkSize; 034 private final Alphabet alpha; 035 private int length; 036 037 // cached info for speedups 038 private transient int currentMin = Integer.MAX_VALUE; 039 private transient int currentMax = Integer.MIN_VALUE; 040 private transient SymbolList currentChunk = null; 041 042 private void readObject(java.io.ObjectInputStream stream) 043 throws IOException, ClassNotFoundException 044 { 045 stream.defaultReadObject(); 046 047 currentMin = Integer.MAX_VALUE; 048 currentMax = Integer.MIN_VALUE; 049 currentChunk = null; 050 } 051 052 protected void finalize() throws Throwable { 053 super.finalize(); 054 alpha.removeChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS); 055 } 056 057 public ChunkedSymbolList(SymbolList [] chunks, 058 int chunkSize, 059 int length, 060 Alphabet alpha) 061 { 062 this.chunks = chunks; 063 this.chunkSize = chunkSize; 064 this.length = length; 065 this.alpha = alpha; 066 alpha.addChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS); 067 } 068 069 public Alphabet getAlphabet() { 070 return alpha; 071 } 072 073 public int length() { 074 return length; 075 } 076 077 public synchronized Symbol symbolAt(int pos) { 078 int offset; 079 --pos; 080 081 if ((pos < currentMin) || (pos > currentMax)) { 082 // bad - we are outside the current chunk 083 int chnk = pos / chunkSize; 084 offset = pos % chunkSize; 085 086 currentMin = pos - offset; 087 currentMax = currentMin + chunkSize - 1; 088 currentChunk = chunks[chnk]; 089 } 090 else { 091 offset = pos - currentMin; 092 } 093 094 return currentChunk.symbolAt(offset + 1); 095 } 096 097 public SymbolList subList(int start, int end) { 098 if (start < 1 || end > length()) { 099 throw new IndexOutOfBoundsException( 100 "Sublist index out of bounds " + length() + ":" + start + "," + end 101 ); 102 } 103 104 if (end < start) { 105 throw new IllegalArgumentException( 106 "end must not be lower than start: start=" + start + ", end=" + end 107 ); 108 } 109 110 // 111 // Mildly optimized for case where from and to are within 112 // the same chunk. 113 // 114 115 int afrom = start - 1; 116 int ato = end - 1; 117 int cfrom = afrom / chunkSize; 118 if (ato / chunkSize == cfrom) { 119 return chunks[cfrom].subList((afrom % chunkSize) + 1, (ato % chunkSize) + 1); 120 } else { 121 return super.subList(start, end); 122 } 123 } 124 125 public void edit(Edit edit) throws IllegalAlphabetException, 126 ChangeVetoException { 127 ChangeSupport cs; 128 ChangeEvent cevt; 129 Symbol[] dest; 130 131 // first make sure that it is in bounds 132 if ((edit.pos + edit.length > length +1 ) || (edit.pos <= 0) || edit.length < 0){ 133 throw new IndexOutOfBoundsException(); 134 } 135 // make sure that the symbolList is of the correct alphabet 136 if (( edit.replacement.getAlphabet() != getAlphabet()) && (edit.replacement != SymbolList.EMPTY_LIST)){ 137 throw new IllegalAlphabetException(); 138 } 139 140 // give the listeners a change to veto this 141 // create a new change event ->the EDIT is a static final variable of type ChangeType in SymbolList interface 142 cevt = new ChangeEvent(this, SymbolList.EDIT, edit); 143 cs = getChangeSupport(SymbolList.EDIT); 144 synchronized(cs) { 145 // let the listeners know what we want to do 146 cs.firePreChangeEvent(cevt); 147 148 // now for the edit 149 int posRightFragInSourceArray = edit.pos + edit.length - 1; 150 int rightFragLength = length - posRightFragInSourceArray; 151 int posRightFragInDestArray = posRightFragInSourceArray + edit.replacement.length() - edit.length; 152 int posReplaceFragInDestArray = edit.pos - 1; 153 int replaceFragLength = edit.replacement.length(); 154 int newLength = length + replaceFragLength - edit.length; 155 156 // extend the array 157 dest = new Symbol[newLength]; 158 159 // copy symbols before the edit and make sure we are not editing the edit at the same time (hoops!) 160 int leftFragLength = edit.pos - 1; 161 for (int i = 0; i < chunks.length; i++) { 162 SymbolList chunk = chunks[i]; 163 Symbol[] chunkSymbol = ((SimpleSymbolList) chunk).getSymbolArray(); 164 int desStart = i * chunkSize; 165 int desEnd = i * chunkSize + chunk.length(); 166 if (desEnd >= leftFragLength) { 167 desEnd = leftFragLength; 168 System.arraycopy(chunkSymbol, 0, dest, desStart, desEnd - desStart); 169 break; 170 } 171 System.arraycopy(chunkSymbol, 0, dest, desStart, desEnd - desStart); 172 } 173 174 // copy the symbols after the edit 175 if (rightFragLength > 0) { 176 int chunkNum = posRightFragInSourceArray / chunkSize; 177 int chunkStart = posRightFragInSourceArray % chunkSize; 178 int chunkCopied = 0; 179 for (int i = chunkNum; i < chunks.length; i++) { 180 SymbolList chunk = chunks[i]; 181 Symbol[] chunkSymbol = ((SimpleSymbolList) chunk).getSymbolArray(); 182 int desStart = posRightFragInDestArray + chunkCopied; 183 int desLength = chunk.length() - chunkStart; 184 int left = rightFragLength - chunkCopied; 185 if (left == 0) { 186 break; 187 } 188 if (desLength > left) { 189 desLength = left; 190 } 191 System.arraycopy(chunkSymbol, chunkStart, dest, desStart, desLength); 192 if (chunkStart != 0) { 193 chunkStart = 0; 194 } 195 chunkCopied += desLength; 196 } 197 } 198 199 // copy the symbols within the edit 200 for (int i = 1; i <= replaceFragLength; i++) { 201 dest[posReplaceFragInDestArray + i - 1] = edit.replacement.symbolAt(i); 202 } 203 204 //Make the chunks 205 int chunkNum = newLength/chunkSize; 206 int chunkLeft = newLength%chunkSize; 207 int chunkReused = (edit.pos-1)/chunkSize; 208 if(chunkLeft>0) chunkNum++; 209 SymbolList [] symListArray = new SymbolList[chunkNum]; 210 for(int i=0;i<chunkNum-1;i++) { 211 if(i<chunkReused) { 212 symListArray[i] = chunks[i]; 213 continue; 214 } 215 Symbol[] chunk = new Symbol[chunkSize]; 216 System.arraycopy(dest, i*chunkSize, chunk, 0, chunkSize); 217 symListArray[i] = new SimpleSymbolList(chunk, chunkSize, getAlphabet()); 218 } 219 if(chunkLeft>0) { 220 Symbol[] chunk = new Symbol[chunkLeft]; 221 System.arraycopy(dest, (chunkNum-1)*chunkSize, chunk, 0, chunkLeft); 222 symListArray[chunkNum-1] = new SimpleSymbolList(chunk, chunkLeft, getAlphabet()); 223 } else { 224 Symbol[] chunk = new Symbol[chunkSize]; 225 System.arraycopy(dest, (chunkNum-1)*chunkSize, chunk, 0, chunkSize); 226 symListArray[chunkNum-1] = new SimpleSymbolList(chunk, chunkSize, getAlphabet()); 227 } 228 229 chunks = symListArray; 230 length = newLength; 231 currentMin = Integer.MAX_VALUE; 232 currentMax = Integer.MIN_VALUE; 233 currentChunk = null; 234 235 cs.firePostChangeEvent(cevt); 236 } 237 } 238}