001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.sequence.storage; 022 023import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 024import org.biojava.nbio.core.sequence.AccessionID; 025import org.biojava.nbio.core.sequence.template.*; 026import org.biojava.nbio.core.util.Equals; 027 028import java.util.*; 029 030/** 031 * This reader actually proxies onto multiple types of sequence in order 032 * to allow a number of sequence objects to act as if they are one sequence. 033 * The code takes in any number of sequences, records the minimum and maximum 034 * bounds each sequence covers with respect to 1 position indexing and then 035 * binary searches these when a position is requested. Because of this 036 * 0 length Sequences are excluded during construction. 037 * 038 * Performance is not as good as if you are using a flat sequence however the 039 * speed of lookup is more than adaquate for most situations. Using the iterator 040 * gives the best performance as this does not rely on the binary search 041 * mechanism instead iterating through each sequence in turn. 042 * 043 * @author ayates 044 * @param <C> Tyoe of compound to hold 045 */ 046public class JoiningSequenceReader<C extends Compound> implements ProxySequenceReader<C> { 047 048 /** 049 * Internal implementation flag and controls how we look for the right 050 * sub-sequence 051 */ 052 private static final boolean BINARY_SEARCH = true; 053 private final List<Sequence<C>> sequences; 054 private final CompoundSet<C> compoundSet; 055 private int[] maxSequenceIndex; 056 private int[] minSequenceIndex; 057 058 /** 059 * Allows creation of the store from Vargs Sequence<C> objects. CompoundSet 060 * defaults to the first element of the array (assuming there are elements 061 * available during construction otherwise we will throw an illegal 062 * state exception). 063 */ 064 public JoiningSequenceReader(Sequence<C>... sequences) { 065 this(Arrays.asList(sequences)); 066 } 067 068 /** 069 * Allows creation of the store from List<Sequence<C>>. CompoundSet 070 * defaults to the first element of the List (assuming there are elements 071 * available during construction otherwise we will throw an illegal 072 * state exception). 073 */ 074 public JoiningSequenceReader(List<Sequence<C>> sequences) { 075 this.sequences = grepSequences(sequences); 076 this.compoundSet = grepCompoundSet(); 077 } 078 079 public JoiningSequenceReader(CompoundSet<C> compoundSet, Sequence<C>... sequences) { 080 this(compoundSet, Arrays.asList(sequences)); 081 } 082 083 public JoiningSequenceReader(CompoundSet<C> compoundSet, List<Sequence<C>> sequences) { 084 this.sequences = grepSequences(sequences); 085 this.compoundSet = compoundSet; 086 } 087 088 private List<Sequence<C>> grepSequences(List<Sequence<C>> sequences) { 089 List<Sequence<C>> seqs = new ArrayList<Sequence<C>>(); 090 for (Sequence<C> s : sequences) { 091 if (s.getLength() != 0) { 092 seqs.add(s); 093 } 094 } 095 return seqs; 096 } 097 098 private CompoundSet<C> grepCompoundSet() { 099 if (sequences.isEmpty()) { 100 throw new IllegalStateException("Cannot get a CompoundSet because we have no sequences. Set during construction"); 101 } 102 return sequences.get(0).getCompoundSet(); 103 } 104 105 106 @Override 107 public C getCompoundAt(int position) { 108 int sequenceIndex = getSequenceIndex(position); 109 Sequence<C> sequence = sequences.get(sequenceIndex); 110 int indexInSequence = (position - getMinSequenceIndex()[sequenceIndex]) + 1; 111 return sequence.getCompoundAt(indexInSequence); 112 } 113 114 115 @Override 116 public CompoundSet<C> getCompoundSet() { 117 return compoundSet; 118 } 119 120 121 @Override 122 public int getLength() { 123 int[] maxSeqIndex = getMaxSequenceIndex(); 124 if (maxSeqIndex.length == 0) { 125 return 0; 126 } 127 return maxSeqIndex[maxSeqIndex.length - 1]; 128 } 129 130 /** 131 * Returns which Sequence holds the position queried for 132 */ 133 private int getSequenceIndex(int position) { 134 if (BINARY_SEARCH) { 135 return binarySearch(position); 136 } else { 137 return linearSearch(position); 138 } 139 } 140 141 private int[] getMinSequenceIndex() { 142 if (minSequenceIndex == null) { 143 initSeqIndexes(); 144 } 145 return minSequenceIndex; 146 } 147 148 private int[] getMaxSequenceIndex() { 149 if (maxSequenceIndex == null) { 150 initSeqIndexes(); 151 } 152 return maxSequenceIndex; 153 } 154 155 private void initSeqIndexes() { 156 minSequenceIndex = new int[sequences.size()]; 157 maxSequenceIndex = new int[sequences.size()]; 158 int currentMaxIndex = 0; 159 int currentMinIndex = 1; 160 int lastLength = 0; 161 for (int i = 0; i < sequences.size(); i++) { 162 currentMinIndex += lastLength; 163 currentMaxIndex += sequences.get(i).getLength(); 164 minSequenceIndex[i] = currentMinIndex; 165 maxSequenceIndex[i] = currentMaxIndex; 166 lastLength = sequences.get(i).getLength(); 167 } 168 } 169 170 /** 171 * Scans through the sequence index arrays in linear time. Not the best 172 * performance but easier to code 173 */ 174 private int linearSearch(int position) { 175 int[] minSeqIndex = getMinSequenceIndex(); 176 int[] maxSeqIndex = getMaxSequenceIndex(); 177 int length = minSeqIndex.length; 178 for (int i = 0; i < length; i++) { 179 if (position >= minSeqIndex[i] && position <= maxSeqIndex[i]) { 180 return i; 181 } 182 } 183 throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence"); 184 } 185 186 /** 187 * Scans through the sequence index arrays using binary search 188 */ 189 private int binarySearch(int position) { 190 int[] minSeqIndex = getMinSequenceIndex(); 191 int[] maxSeqIndex = getMaxSequenceIndex(); 192 193 int low = 0; 194 int high = minSeqIndex.length - 1; 195 while (low <= high) { 196 //Go to the mid point in the array 197 int mid = (low + high) >>> 1; 198 199 //Get the max position represented by this Sequence 200 int midMinPosition = minSeqIndex[mid]; 201 int midMaxPosition = maxSeqIndex[mid]; 202 203 //if current position is greater than the current bounds then 204 //increase search space 205 if (midMinPosition < position && midMaxPosition < position) { 206 low = mid + 1; 207 } //if current position is less than current bounds then decrease 208 //search space 209 else if (midMinPosition > position && midMaxPosition > position) { 210 high = mid - 1; 211 } else { 212 return mid; 213 } 214 } 215 throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence"); 216 } 217 218 /** 219 * Iterator implementation which attempts to move through the 2D structure 220 * attempting to skip onto the next sequence as & when it is asked to 221 */ 222 223 @Override 224 public Iterator<C> iterator() { 225 final List<Sequence<C>> localSequences = sequences; 226 return new Iterator<C>() { 227 228 private Iterator<C> currentSequenceIterator = null; 229 private int currentPosition = 0; 230 231 232 @Override 233 public boolean hasNext() { 234 //If the current iterator is null then see if the Sequences object has anything 235 if (currentSequenceIterator == null) { 236 return !localSequences.isEmpty(); 237 } 238 239 //See if we had any compounds 240 boolean hasNext = currentSequenceIterator.hasNext(); 241 if (!hasNext) { 242 hasNext = currentPosition < sequences.size(); 243 } 244 return hasNext; 245 } 246 247 248 @Override 249 public C next() { 250 if (currentSequenceIterator == null) { 251 if (localSequences.isEmpty()) { 252 throw new NoSuchElementException("No sequences to iterate over; make sure you call hasNext() before next()"); 253 } 254 currentSequenceIterator = localSequences.get(currentPosition).iterator(); 255 currentPosition++; 256 } 257 if (!currentSequenceIterator.hasNext()) { 258 currentSequenceIterator = localSequences.get(currentPosition).iterator(); 259 currentPosition++; 260 } 261 return currentSequenceIterator.next(); 262 } 263 264 265 @Override 266 public void remove() throws UnsupportedOperationException { 267 throw new UnsupportedOperationException("Cannot remove from this Sequence"); 268 } 269 }; 270 } 271 272 273 @Override 274 public void setCompoundSet(CompoundSet<C> compoundSet) { 275 throw new UnsupportedOperationException(); 276 } 277 278 279 @Override 280 public void setContents(String sequence) throws CompoundNotFoundException { 281 throw new UnsupportedOperationException(); 282 } 283 284 285 @Override 286 public int countCompounds(C... compounds) { 287 return SequenceMixin.countCompounds(this, compounds); 288 } 289 290 291 @Override 292 public AccessionID getAccession() throws UnsupportedOperationException { 293 throw new UnsupportedOperationException(); 294 } 295 296 297 @Override 298 public List<C> getAsList() { 299 return SequenceMixin.toList(this); 300 } 301 302 @Override 303 public boolean equals(Object o) { 304 305 if(! Equals.classEqual(this, o)) { 306 return false; 307 } 308 309 Sequence<C> other = (Sequence<C>)o; 310 if ( other.getCompoundSet() != getCompoundSet()) 311 return false; 312 313 List<C> rawCompounds = getAsList(); 314 List<C> otherCompounds = other.getAsList(); 315 316 if ( rawCompounds.size() != otherCompounds.size()) 317 return false; 318 319 for (int i = 0 ; i < rawCompounds.size() ; i++){ 320 Compound myCompound = rawCompounds.get(i); 321 Compound otherCompound = otherCompounds.get(i); 322 if ( ! myCompound.equalsIgnoreCase(otherCompound)) 323 return false; 324 } 325 return true; 326 } 327 328 @Override 329 public int hashCode(){ 330 String s = getSequenceAsString(); 331 return s.hashCode(); 332 } 333 334 @Override 335 public int getIndexOf(C compound) { 336 return SequenceMixin.indexOf(this, compound); 337 } 338 339 340 @Override 341 public int getLastIndexOf(C compound) { 342 return SequenceMixin.lastIndexOf(this, compound); 343 } 344 345 346 @Override 347 public String getSequenceAsString() { 348 return SequenceMixin.toStringBuilder(this).toString(); 349 } 350 351 352 @Override 353 public SequenceView<C> getSubSequence(Integer start, Integer end) { 354 return SequenceMixin.createSubSequence(this, start, end); 355 } 356 357 @Override 358 public SequenceView<C> getInverse() { 359 return SequenceMixin.inverse(this); 360 } 361}