001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 14, 2010 021 * Author: Mark Chapman 022 * @author Paolo Pavan 023 */ 024 025package org.biojava.nbio.core.alignment; 026 027import org.biojava.nbio.core.alignment.template.AlignedSequence; 028import org.biojava.nbio.core.sequence.AccessionID; 029import org.biojava.nbio.core.sequence.Strand; 030import org.biojava.nbio.core.sequence.location.SimpleLocation; 031import org.biojava.nbio.core.sequence.location.template.Location; 032import org.biojava.nbio.core.sequence.location.template.Point; 033import org.biojava.nbio.core.sequence.template.*; 034import org.biojava.nbio.core.util.Equals; 035 036import java.io.Serializable; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.Iterator; 040import java.util.List; 041 042/** 043 * Implements a data structure for a {@link Sequence} within an alignment. 044 * 045 * @author Mark Chapman 046 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C 047 */ 048public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> implements Serializable, AlignedSequence<S, C> { 049 050 051 private static final long serialVersionUID = 1L; 052 053 private static final String gap = "-"; 054 055 // always stored 056 private AlignedSequence<S, C> prev; 057 private S original; 058 private int length, numBefore, numAfter; 059 private Location location; 060 061 // cached (lazily initialized) 062 private int numGaps = -1; 063 private int numGapPositions = -1; 064 065 private int[] alignmentFromSequence; 066 private int[] sequenceFromAlignment; 067 068 /** 069 * Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment. 070 * 071 * @param original the original {@link Sequence} before alignment 072 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 073 * @throws IllegalArgumentException if given sequence does not fit in alignment 074 */ 075 public SimpleAlignedSequence(S original, List<Step> steps) { 076 this(original, steps, 0, 0); 077 } 078 079 /** 080 * Creates an {@link AlignedSequence} for the given {@link Sequence} in a local alignment. 081 * 082 * @param original the original {@link Sequence} before alignment 083 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 084 * @param numBefore number of {@link Compound}s before a local alignment 085 * @param numAfter number of {@link Compound}s after a local alignment 086 * @throws IllegalArgumentException if given sequence does not fit in alignment 087 */ 088 public SimpleAlignedSequence(S original, List<Step> steps, int numBefore, int numAfter) { 089 this.original = original; 090 this.numBefore = numBefore; 091 this.numAfter = numAfter; 092 length = steps.size(); 093 setLocation(steps); 094 } 095 096 /** 097 * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a global alignment. 098 * 099 * @param prev the previous {@link AlignedSequence} before this alignment 100 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 101 * @throws IllegalArgumentException if given sequence does not fit in alignment 102 */ 103 public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps) { 104 this(prev, steps, 0, 0); 105 } 106 107 /** 108 * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a local alignment. 109 * 110 * @param prev the previous {@link AlignedSequence} before this alignment 111 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 112 * @param numBefore number of {@link Compound}s before a local alignment 113 * @param numAfter number of {@link Compound}s after a local alignment 114 * @throws IllegalArgumentException if given sequence does not fit in alignment 115 */ 116 public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps, int numBefore, int numAfter) { 117 this.prev = prev; 118 this.original = prev.getOriginalSequence(); 119 this.numBefore = numBefore; 120 this.numAfter = numAfter; 121 if (prev instanceof SimpleAlignedSequence<?, ?>) { 122 SimpleAlignedSequence<?, ?> p = (SimpleAlignedSequence<?, ?>) prev; 123 this.numBefore += p.numBefore; 124 this.numAfter += p.numAfter; 125 } 126 length = steps.size(); 127 setLocation(steps); 128 } 129 130 // methods for AlignedSequence 131 132 @Override 133 public void clearCache() { 134 alignmentFromSequence = null; 135 sequenceFromAlignment = null; 136 } 137 138 private void setAlignmentFromSequence() { 139 alignmentFromSequence = new int[original.getLength()]; 140 int s = 1, a = 1; 141 for (int i = 0; i < numBefore; i++, s++) { 142 alignmentFromSequence[s - 1] = a; 143 } 144 for (; s <= alignmentFromSequence.length && a <= length; s++, a++) { 145 while (a <= length && isGap(a)) { 146 a++; 147 } 148 alignmentFromSequence[s - 1] = a; 149 } 150 a--; 151 for (int i = 0; i < numAfter; i++, s++) { 152 alignmentFromSequence[s - 1] = a; 153 } 154 } 155 156 @Override 157 public int[] getAlignmentFromSequence() { 158 if (alignmentFromSequence == null) 159 setAlignmentFromSequence(); 160 return alignmentFromSequence; 161 } 162 163 @Override 164 public int getAlignmentIndexAt(int sequenceIndex) { 165 if (alignmentFromSequence == null) 166 setAlignmentFromSequence(); 167 return alignmentFromSequence[sequenceIndex - 1]; 168 } 169 170 @Override 171 public Point getEnd() { 172 return location.getEnd(); 173 } 174 175 @Override 176 public Location getLocationInAlignment() { 177 return location; 178 } 179 180 @Override 181 public int getNumGaps() { 182 if (numGaps == -1) { 183 numGaps = 0; 184 numGapPositions = 0; 185 C cGap = getCompoundSet().getCompoundForString(gap); 186 boolean inGap = false; 187 for (C compound : getAsList()) { 188 if (compound == null || compound.equalsIgnoreCase(cGap)) { 189 if (!inGap) { 190 numGaps++; 191 inGap = true; 192 } 193 numGapPositions++; 194 } else { 195 inGap = false; 196 } 197 } 198 } 199 return numGaps; 200 } 201 202 @Override 203 public S getOriginalSequence() { 204 return original; 205 } 206 207 @Override 208 public int getOverlapCount() { 209 // TODO handle circular alignments 210 return 1; 211 } 212 213 private void setSequenceFromAlignment() { 214 sequenceFromAlignment = new int[length]; 215 int a = 1, s = numBefore + 1; 216 for (int i = 0; i < getStart().getPosition(); i++, a++) { 217 sequenceFromAlignment[a - 1] = s; 218 } 219 for (; a <= length; a++) { 220 if (!isGap(a)) { 221 s++; 222 } 223 sequenceFromAlignment[a - 1] = s; 224 } 225 } 226 227 @Override 228 public int[] getSequenceFromAlignment() { 229 if (sequenceFromAlignment == null) 230 setSequenceFromAlignment(); 231 return sequenceFromAlignment; 232 } 233 234 @Override 235 public int getSequenceIndexAt(int alignmentIndex) { 236 if (sequenceFromAlignment == null) 237 setSequenceFromAlignment(); 238 return sequenceFromAlignment[alignmentIndex - 1]; 239 } 240 241 @Override 242 public Point getStart() { 243 return location.getStart(); 244 } 245 246 @Override 247 public boolean isCircular() { 248 return location.isCircular(); 249 } 250 251 @Override 252 public boolean isGap(int alignmentIndex) { 253 if (getStart().getPosition() <= alignmentIndex && alignmentIndex <= getEnd().getPosition()) { 254 if (!location.isComplex()) { 255 return false; 256 } 257 for (Location sublocation : location) { 258 if (sublocation.getStart().getPosition() <= alignmentIndex && 259 alignmentIndex <= sublocation.getEnd().getPosition()) { 260 return false; 261 } 262 } 263 } 264 return true; 265 } 266 267 // methods for Sequence 268 269 @Override 270 public int countCompounds(C... compounds) { 271 int count = 0; 272 List<C> search = Arrays.asList(compounds); 273 for (C compound : getAsList()) { 274 if (search.contains(compound)) { 275 count++; 276 } 277 } 278 return count; 279 } 280 281 @Override 282 public AccessionID getAccession() { 283 return original.getAccession(); 284 } 285 286 @Override 287 public List<C> getAsList() { 288 List<C> compounds = new ArrayList<C>(); 289 for (int i = 1; i <= length; i++) { 290 compounds.add(getCompoundAt(i)); 291 } 292 return compounds; 293 } 294 295 @Override 296 public boolean equals(Object o){ 297 298 if(! Equals.classEqual(this, o)) { 299 return false; 300 } 301 302 Sequence<C> other = (Sequence<C>)o; 303 if ( original.getAsList().size() != other.getAsList().size()) 304 return false; 305 306 for ( int i = 0 ; i< original.getAsList().size() ; i++){ 307 if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i))) 308 return false; 309 } 310 return true; 311 } 312 313 @Override 314 public int hashCode(){ 315 String s = getSequenceAsString(); 316 return s.hashCode(); 317 } 318 319 @Override 320 public C getCompoundAt(int alignmentIndex) { 321 return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ? 322 getCompoundSet().getCompoundForString(gap) : 323 original.getCompoundAt(getSequenceIndexAt(alignmentIndex)); 324 } 325 326 @Override 327 public CompoundSet<C> getCompoundSet() { 328 return original.getCompoundSet(); 329 } 330 331 @Override 332 public int getIndexOf(C compound) { 333 for (int i = 1; i <= length; i++) { 334 if (compound.equals(getCompoundAt(i))) { 335 return i; 336 } 337 } 338 return -1; 339 } 340 341 @Override 342 public int getLastIndexOf(C compound) { 343 for (int i = length; i >= 1; i--) { 344 if (compound.equals(getCompoundAt(i))) { 345 return i; 346 } 347 } 348 return -1; 349 } 350 351 @Override 352 public int getLength() { 353 return length; 354 } 355 356 @Override 357 public String getSequenceAsString() { 358 return SequenceMixin.toString(this); 359 } 360 361 @Override 362 public SequenceView<C> getSubSequence(Integer start, Integer end) { 363 return SequenceMixin.createSubSequence(this, start, end); 364 } 365 366 // method for Iterable 367 368 @Override 369 public Iterator<C> iterator() { 370 return getAsList().iterator(); 371 } 372 373 // method from Object 374 375 /** 376 * Provides standard Java language access to results of {@link #getSequenceAsString()}. 377 */ 378 @Override 379 public String toString() { 380 return getSequenceAsString(); 381 } 382 383 // helper method to initialize the location 384 private void setLocation(List<Step> steps) { 385 List<Location> sublocations = new ArrayList<Location>(); 386 int start = 0, step = 0, oStep = numBefore+numAfter, oMax = this.original.getLength(), pStep = 0, pMax = 387 (prev == null) ? 0 : prev.getLength(); 388 boolean inGap = true; 389 390 // build sublocations: pieces of sequence separated by gaps 391 for (; step < length; step++) { 392 boolean isGapStep = (steps.get(step) == Step.GAP), 393 isGapPrev = (pStep < pMax && prev.isGap(pStep + 1)); 394 if (!isGapStep && !isGapPrev) { 395 oStep++; 396 if (inGap) { 397 inGap = false; 398 start = step + 1; 399 } 400 } else if (!inGap) { 401 inGap = true; 402 sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED)); 403 } 404 if (prev != null && !isGapStep) { 405 pStep++; 406 } 407 } 408 if (!inGap) { 409 sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED)); 410 } 411 412 // combine sublocations into 1 Location 413 if (sublocations.size() == 0) { 414 location = null; 415 } else if (sublocations.size() == 1) { 416 location = sublocations.get(0); 417 } else { 418 location = new SimpleLocation(sublocations.get(0).getStart(), sublocations.get(sublocations.size() - 1).getEnd(), 419 Strand.UNDEFINED, 420 false, sublocations); 421 } 422 // TODO handle circular alignments 423 424 // check that alignment has correct number of compounds in it to fit original sequence 425 if (step != length || oStep != oMax || pStep != pMax) { 426 throw new IllegalArgumentException("Given sequence does not fit in alignment."); 427 } 428 } 429 430 @Override 431 //TODO Needs to implements 432 public SequenceView<C> getInverse() { 433 throw new UnsupportedOperationException("Not supported yet."); 434 } 435 436 @Override 437 public int getNumGapPositions() { 438 if (numGapPositions == -1) 439 getNumGaps(); 440 return numGapPositions; 441 } 442 443 @Override 444 public double getCoverage() { 445 446 double coverage = getLength() - getNumGapPositions(); 447 return coverage / getOriginalSequence().getLength(); 448 } 449}