001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on June 14, 2010 021 * Author: Mark Chapman 022 * @author Paolo Pavan 023 */ 024 025package org.biojava.nbio.core.alignment; 026 027import org.biojava.nbio.core.alignment.template.AlignedSequence; 028import org.biojava.nbio.core.sequence.AccessionID; 029import org.biojava.nbio.core.sequence.Strand; 030import org.biojava.nbio.core.sequence.location.SimpleLocation; 031import org.biojava.nbio.core.sequence.location.template.Location; 032import org.biojava.nbio.core.sequence.location.template.Point; 033import org.biojava.nbio.core.sequence.template.*; 034import org.biojava.nbio.core.util.Equals; 035 036import java.io.Serializable; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.Iterator; 040import java.util.List; 041 042/** 043 * Implements a data structure for a {@link Sequence} within an alignment. 044 * 045 * @author Mark Chapman 046 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C 047 * @param <S> the sequence type 048 */ 049public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> implements Serializable, AlignedSequence<S, C> { 050 051 052 private static final long serialVersionUID = 1L; 053 054 private static final String gap = "-"; 055 056 // always stored 057 private AlignedSequence<S, C> prev; 058 private S original; 059 private int length, numBefore, numAfter; 060 private Location location; 061 062 // cached (lazily initialized) 063 private int numGaps = -1; 064 private int numGapPositions = -1; 065 066 private int[] alignmentFromSequence; 067 private int[] sequenceFromAlignment; 068 069 /** 070 * Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment. 071 * 072 * @param original the original {@link Sequence} before alignment 073 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 074 * @throws IllegalArgumentException if given sequence does not fit in alignment 075 */ 076 public SimpleAlignedSequence(S original, List<Step> steps) { 077 this(original, steps, 0, 0); 078 } 079 080 /** 081 * Creates an {@link AlignedSequence} for the given {@link Sequence} in a local alignment. 082 * 083 * @param original the original {@link Sequence} before alignment 084 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 085 * @param numBefore number of {@link Compound}s before a local alignment 086 * @param numAfter number of {@link Compound}s after a local alignment 087 * @throws IllegalArgumentException if given sequence does not fit in alignment 088 */ 089 public SimpleAlignedSequence(S original, List<Step> steps, int numBefore, int numAfter) { 090 this.original = original; 091 this.numBefore = numBefore; 092 this.numAfter = numAfter; 093 length = steps.size(); 094 setLocation(steps); 095 } 096 097 /** 098 * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a global alignment. 099 * 100 * @param prev the previous {@link AlignedSequence} before this alignment 101 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 102 * @throws IllegalArgumentException if given sequence does not fit in alignment 103 */ 104 public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps) { 105 this(prev, steps, 0, 0); 106 } 107 108 /** 109 * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a local alignment. 110 * 111 * @param prev the previous {@link AlignedSequence} before this alignment 112 * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment 113 * @param numBefore number of {@link Compound}s before a local alignment 114 * @param numAfter number of {@link Compound}s after a local alignment 115 * @throws IllegalArgumentException if given sequence does not fit in alignment 116 */ 117 public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps, int numBefore, int numAfter) { 118 this.prev = prev; 119 this.original = prev.getOriginalSequence(); 120 this.numBefore = numBefore; 121 this.numAfter = numAfter; 122 if (prev instanceof SimpleAlignedSequence<?, ?>) { 123 SimpleAlignedSequence<?, ?> p = (SimpleAlignedSequence<?, ?>) prev; 124 this.numBefore += p.numBefore; 125 this.numAfter += p.numAfter; 126 } 127 length = steps.size(); 128 setLocation(steps); 129 } 130 131 // methods for AlignedSequence 132 133 @Override 134 public void clearCache() { 135 alignmentFromSequence = null; 136 sequenceFromAlignment = null; 137 } 138 139 private void setAlignmentFromSequence() { 140 alignmentFromSequence = new int[original.getLength()]; 141 int s = 1, a = 1; 142 for (int i = 0; i < numBefore; i++, s++) { 143 alignmentFromSequence[s - 1] = a; 144 } 145 for (; s <= alignmentFromSequence.length && a <= length; s++, a++) { 146 while (a <= length && isGap(a)) { 147 a++; 148 } 149 alignmentFromSequence[s - 1] = a; 150 } 151 a--; 152 for (int i = 0; i < numAfter; i++, s++) { 153 alignmentFromSequence[s - 1] = a; 154 } 155 } 156 157 @Override 158 public int[] getAlignmentFromSequence() { 159 if (alignmentFromSequence == null) 160 setAlignmentFromSequence(); 161 return alignmentFromSequence; 162 } 163 164 @Override 165 public int getAlignmentIndexAt(int sequenceIndex) { 166 if (alignmentFromSequence == null) 167 setAlignmentFromSequence(); 168 return alignmentFromSequence[sequenceIndex - 1]; 169 } 170 171 @Override 172 public Point getEnd() { 173 return location.getEnd(); 174 } 175 176 @Override 177 public Location getLocationInAlignment() { 178 return location; 179 } 180 181 @Override 182 public int getNumGaps() { 183 if (numGaps == -1) { 184 numGaps = 0; 185 numGapPositions = 0; 186 C cGap = getCompoundSet().getCompoundForString(gap); 187 boolean inGap = false; 188 for (C compound : getAsList()) { 189 if (compound == null || compound.equalsIgnoreCase(cGap)) { 190 if (!inGap) { 191 numGaps++; 192 inGap = true; 193 } 194 numGapPositions++; 195 } else { 196 inGap = false; 197 } 198 } 199 } 200 return numGaps; 201 } 202 203 @Override 204 public S getOriginalSequence() { 205 return original; 206 } 207 208 @Override 209 public int getOverlapCount() { 210 // TODO handle circular alignments 211 return 1; 212 } 213 214 private void setSequenceFromAlignment() { 215 sequenceFromAlignment = new int[length]; 216 int a = 1, s = numBefore + 1; 217 for (int i = 0; i < getStart().getPosition(); i++, a++) { 218 sequenceFromAlignment[a - 1] = s; 219 } 220 for (; a <= length; a++) { 221 if (!isGap(a)) { 222 s++; 223 } 224 sequenceFromAlignment[a - 1] = s; 225 } 226 } 227 228 @Override 229 public int[] getSequenceFromAlignment() { 230 if (sequenceFromAlignment == null) 231 setSequenceFromAlignment(); 232 return sequenceFromAlignment; 233 } 234 235 @Override 236 public int getSequenceIndexAt(int alignmentIndex) { 237 if (sequenceFromAlignment == null) 238 setSequenceFromAlignment(); 239 return sequenceFromAlignment[alignmentIndex - 1]; 240 } 241 242 @Override 243 public Point getStart() { 244 return location.getStart(); 245 } 246 247 @Override 248 public boolean isCircular() { 249 return location.isCircular(); 250 } 251 252 @Override 253 public boolean isGap(int alignmentIndex) { 254 if (getStart().getPosition() <= alignmentIndex && alignmentIndex <= getEnd().getPosition()) { 255 if (!location.isComplex()) { 256 return false; 257 } 258 for (Location sublocation : location) { 259 if (sublocation.getStart().getPosition() <= alignmentIndex && 260 alignmentIndex <= sublocation.getEnd().getPosition()) { 261 return false; 262 } 263 } 264 } 265 return true; 266 } 267 268 // methods for Sequence 269 270 @Override 271 public int countCompounds(C... compounds) { 272 int count = 0; 273 List<C> search = Arrays.asList(compounds); 274 for (C compound : getAsList()) { 275 if (search.contains(compound)) { 276 count++; 277 } 278 } 279 return count; 280 } 281 282 @Override 283 public AccessionID getAccession() { 284 return original.getAccession(); 285 } 286 287 @Override 288 public List<C> getAsList() { 289 List<C> compounds = new ArrayList<>(); 290 for (int i = 1; i <= length; i++) { 291 compounds.add(getCompoundAt(i)); 292 } 293 return compounds; 294 } 295 296 @Override 297 public boolean equals(Object o){ 298 299 if(! Equals.classEqual(this, o)) { 300 return false; 301 } 302 303 Sequence<C> other = (Sequence<C>)o; 304 if ( original.getAsList().size() != other.getAsList().size()) 305 return false; 306 307 for ( int i = 0 ; i< original.getAsList().size() ; i++){ 308 if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i))) 309 return false; 310 } 311 return true; 312 } 313 314 @Override 315 public int hashCode(){ 316 String s = getSequenceAsString(); 317 return s.hashCode(); 318 } 319 320 @Override 321 public C getCompoundAt(int alignmentIndex) { 322 return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ? 323 getCompoundSet().getCompoundForString(gap) : 324 original.getCompoundAt(getSequenceIndexAt(alignmentIndex)); 325 } 326 327 @Override 328 public CompoundSet<C> getCompoundSet() { 329 return original.getCompoundSet(); 330 } 331 332 @Override 333 public int getIndexOf(C compound) { 334 for (int i = 1; i <= length; i++) { 335 if (compound.equals(getCompoundAt(i))) { 336 return i; 337 } 338 } 339 return -1; 340 } 341 342 @Override 343 public int getLastIndexOf(C compound) { 344 for (int i = length; i >= 1; i--) { 345 if (compound.equals(getCompoundAt(i))) { 346 return i; 347 } 348 } 349 return -1; 350 } 351 352 @Override 353 public int getLength() { 354 return length; 355 } 356 357 @Override 358 public String getSequenceAsString() { 359 return SequenceMixin.toString(this); 360 } 361 362 @Override 363 public SequenceView<C> getSubSequence(Integer start, Integer end) { 364 return SequenceMixin.createSubSequence(this, start, end); 365 } 366 367 // method for Iterable 368 369 @Override 370 public Iterator<C> iterator() { 371 return getAsList().iterator(); 372 } 373 374 // method from Object 375 376 /** 377 * Provides standard Java language access to results of {@link #getSequenceAsString()}. 378 */ 379 @Override 380 public String toString() { 381 return getSequenceAsString(); 382 } 383 384 // helper method to initialize the location 385 private void setLocation(List<Step> steps) { 386 List<Location> sublocations = new ArrayList<>(); 387 int start = 0, step = 0, oStep = numBefore+numAfter, oMax = this.original.getLength(), pStep = 0, pMax = 388 (prev == null) ? 0 : prev.getLength(); 389 boolean inGap = true; 390 391 // build sublocations: pieces of sequence separated by gaps 392 for (; step < length; step++) { 393 boolean isGapStep = (steps.get(step) == Step.GAP), 394 isGapPrev = (pStep < pMax && prev.isGap(pStep + 1)); 395 if (!isGapStep && !isGapPrev) { 396 oStep++; 397 if (inGap) { 398 inGap = false; 399 start = step + 1; 400 } 401 } else if (!inGap) { 402 inGap = true; 403 sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED)); 404 } 405 if (prev != null && !isGapStep) { 406 pStep++; 407 } 408 } 409 if (!inGap) { 410 sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED)); 411 } 412 413 // combine sublocations into 1 Location 414 if (sublocations.size() == 0) { 415 location = null; 416 } else if (sublocations.size() == 1) { 417 location = sublocations.get(0); 418 } else { 419 location = new SimpleLocation(sublocations.get(0).getStart(), sublocations.get(sublocations.size() - 1).getEnd(), 420 Strand.UNDEFINED, 421 false, sublocations); 422 } 423 // TODO handle circular alignments 424 425 // check that alignment has correct number of compounds in it to fit original sequence 426 if (step != length || oStep != oMax || pStep != pMax) { 427 throw new IllegalArgumentException("Given sequence does not fit in alignment."); 428 } 429 } 430 431 @Override 432 //TODO Needs to implements 433 public SequenceView<C> getInverse() { 434 throw new UnsupportedOperationException("Not supported yet."); 435 } 436 437 @Override 438 public int getNumGapPositions() { 439 if (numGapPositions == -1) 440 getNumGaps(); 441 return numGapPositions; 442 } 443 444 @Override 445 public double getCoverage() { 446 447 double coverage = getLength() - getNumGapPositions(); 448 return coverage / getOriginalSequence().getLength(); 449 } 450}