001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 14, 2010
021 * Author: Mark Chapman
022 * @author Paolo Pavan
023 */
024
025package org.biojava.nbio.core.alignment;
026
027import org.biojava.nbio.core.alignment.template.AlignedSequence;
028import org.biojava.nbio.core.sequence.AccessionID;
029import org.biojava.nbio.core.sequence.Strand;
030import org.biojava.nbio.core.sequence.location.SimpleLocation;
031import org.biojava.nbio.core.sequence.location.template.Location;
032import org.biojava.nbio.core.sequence.location.template.Point;
033import org.biojava.nbio.core.sequence.template.*;
034import org.biojava.nbio.core.util.Equals;
035
036import java.io.Serializable;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.Iterator;
040import java.util.List;
041
042/**
043 * Implements a data structure for a {@link Sequence} within an alignment.
044 *
045 * @author Mark Chapman
046 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C
047 * @param <S> the sequence type
048 */
049public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> implements Serializable, AlignedSequence<S, C> {
050
051
052        private static final long serialVersionUID = 1L;
053
054        private static final String gap = "-";
055
056        // always stored
057        private AlignedSequence<S, C> prev;
058        private S original;
059        private int length, numBefore, numAfter;
060        private Location location;
061
062        // cached (lazily initialized)
063        private int numGaps = -1;
064        private int numGapPositions = -1;
065
066        private int[] alignmentFromSequence;
067        private int[] sequenceFromAlignment;
068
069        /**
070         * Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment.
071         *
072         * @param original the original {@link Sequence} before alignment
073         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
074         * @throws IllegalArgumentException if given sequence does not fit in alignment
075         */
076        public SimpleAlignedSequence(S original, List<Step> steps) {
077                this(original, steps, 0, 0);
078        }
079
080        /**
081         * Creates an {@link AlignedSequence} for the given {@link Sequence} in a local alignment.
082         *
083         * @param original the original {@link Sequence} before alignment
084         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
085         * @param numBefore number of {@link Compound}s before a local alignment
086         * @param numAfter number of {@link Compound}s after a local alignment
087         * @throws IllegalArgumentException if given sequence does not fit in alignment
088         */
089        public SimpleAlignedSequence(S original, List<Step> steps, int numBefore, int numAfter) {
090                this.original = original;
091                this.numBefore = numBefore;
092                this.numAfter = numAfter;
093                length = steps.size();
094                setLocation(steps);
095        }
096
097        /**
098         * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a global alignment.
099         *
100         * @param prev the previous {@link AlignedSequence} before this alignment
101         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
102         * @throws IllegalArgumentException if given sequence does not fit in alignment
103         */
104        public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps) {
105                this(prev, steps, 0, 0);
106        }
107
108        /**
109         * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a local alignment.
110         *
111         * @param prev the previous {@link AlignedSequence} before this alignment
112         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
113         * @param numBefore number of {@link Compound}s before a local alignment
114         * @param numAfter number of {@link Compound}s after a local alignment
115         * @throws IllegalArgumentException if given sequence does not fit in alignment
116         */
117        public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps, int numBefore, int numAfter) {
118                this.prev = prev;
119                this.original = prev.getOriginalSequence();
120                this.numBefore = numBefore;
121                this.numAfter = numAfter;
122                if (prev instanceof SimpleAlignedSequence<?, ?>) {
123                        SimpleAlignedSequence<?, ?> p = (SimpleAlignedSequence<?, ?>) prev;
124                        this.numBefore += p.numBefore;
125                        this.numAfter += p.numAfter;
126                }
127                length = steps.size();
128                setLocation(steps);
129        }
130
131        // methods for AlignedSequence
132
133        @Override
134        public void clearCache() {
135                alignmentFromSequence = null;
136                sequenceFromAlignment = null;
137        }
138
139        private void setAlignmentFromSequence() {
140                alignmentFromSequence = new int[original.getLength()];
141                int s = 1, a = 1;
142                for (int i = 0; i < numBefore; i++, s++) {
143                        alignmentFromSequence[s - 1] = a;
144                }
145                for (; s <= alignmentFromSequence.length && a <= length; s++, a++) {
146                        while (a <= length && isGap(a)) {
147                                a++;
148                        }
149                        alignmentFromSequence[s - 1] = a;
150                }
151                a--;
152                for (int i = 0; i < numAfter; i++, s++) {
153                        alignmentFromSequence[s - 1] = a;
154                }
155        }
156
157        @Override
158        public int[] getAlignmentFromSequence() {
159                if (alignmentFromSequence == null)
160                        setAlignmentFromSequence();
161                return alignmentFromSequence;
162        }
163
164        @Override
165        public int getAlignmentIndexAt(int sequenceIndex) {
166                if (alignmentFromSequence == null)
167                        setAlignmentFromSequence();
168                return alignmentFromSequence[sequenceIndex - 1];
169        }
170
171        @Override
172        public Point getEnd() {
173                return location.getEnd();
174        }
175
176        @Override
177        public Location getLocationInAlignment() {
178                return location;
179        }
180
181        @Override
182        public int getNumGaps() {
183                if (numGaps == -1) {
184                        numGaps = 0;
185                        numGapPositions = 0;
186                        C cGap = getCompoundSet().getCompoundForString(gap);
187                        boolean inGap = false;
188                        for (C compound : getAsList()) {
189                                if (compound == null || compound.equalsIgnoreCase(cGap)) {
190                                        if (!inGap) {
191                                                numGaps++;
192                                                inGap = true;
193                                        }
194                                        numGapPositions++;
195                                } else {
196                                        inGap = false;
197                                }
198                        }
199                }
200                return numGaps;
201        }
202
203        @Override
204        public S getOriginalSequence() {
205                return original;
206        }
207
208        @Override
209        public int getOverlapCount() {
210                // TODO handle circular alignments
211                return 1;
212        }
213
214        private void setSequenceFromAlignment() {
215                sequenceFromAlignment = new int[length];
216                int a = 1, s = numBefore + 1;
217                for (int i = 0; i < getStart().getPosition(); i++, a++) {
218                        sequenceFromAlignment[a - 1] = s;
219                }
220                for (; a <= length; a++) {
221                        if (!isGap(a)) {
222                                s++;
223                        }
224                        sequenceFromAlignment[a - 1] = s;
225                }
226        }
227
228        @Override
229        public int[] getSequenceFromAlignment() {
230                if (sequenceFromAlignment == null)
231                        setSequenceFromAlignment();
232                return sequenceFromAlignment;
233        }
234
235        @Override
236        public int getSequenceIndexAt(int alignmentIndex) {
237                if (sequenceFromAlignment == null)
238                        setSequenceFromAlignment();
239                return sequenceFromAlignment[alignmentIndex - 1];
240        }
241
242        @Override
243        public Point getStart() {
244                return location.getStart();
245        }
246
247        @Override
248        public boolean isCircular() {
249                return location.isCircular();
250        }
251
252        @Override
253        public boolean isGap(int alignmentIndex) {
254                if (getStart().getPosition() <= alignmentIndex && alignmentIndex <= getEnd().getPosition()) {
255                        if (!location.isComplex()) {
256                                return false;
257                        }
258                        for (Location sublocation : location) {
259                                if (sublocation.getStart().getPosition() <= alignmentIndex &&
260                                                alignmentIndex <= sublocation.getEnd().getPosition()) {
261                                        return false;
262                                }
263                        }
264                }
265                return true;
266        }
267
268        // methods for Sequence
269
270        @Override
271        public int countCompounds(C... compounds) {
272                int count = 0;
273                List<C> search = Arrays.asList(compounds);
274                for (C compound : getAsList()) {
275                        if (search.contains(compound)) {
276                                count++;
277                        }
278                }
279                return count;
280        }
281
282        @Override
283        public AccessionID getAccession() {
284                return original.getAccession();
285        }
286
287        @Override
288        public List<C> getAsList() {
289                List<C> compounds = new ArrayList<>();
290                for (int i = 1; i <= length; i++) {
291                        compounds.add(getCompoundAt(i));
292                }
293                return compounds;
294        }
295
296        @Override
297        public boolean equals(Object o){
298
299                if(! Equals.classEqual(this, o)) {
300                        return false;
301                }
302
303                Sequence<C> other = (Sequence<C>)o;
304                if ( original.getAsList().size() != other.getAsList().size())
305                        return false;
306
307                for ( int i = 0 ; i< original.getAsList().size() ; i++){
308                        if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i)))
309                                return false;
310                }
311                return true;
312        }
313
314        @Override
315        public int hashCode(){
316                String s = getSequenceAsString();
317                return s.hashCode();
318        }
319
320        @Override
321        public C getCompoundAt(int alignmentIndex) {
322                return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ?
323                                getCompoundSet().getCompoundForString(gap) :
324                                original.getCompoundAt(getSequenceIndexAt(alignmentIndex));
325        }
326
327        @Override
328        public CompoundSet<C> getCompoundSet() {
329                return original.getCompoundSet();
330        }
331
332        @Override
333        public int getIndexOf(C compound) {
334                for (int i = 1; i <= length; i++) {
335                        if (compound.equals(getCompoundAt(i))) {
336                                return i;
337                        }
338                }
339                return -1;
340        }
341
342        @Override
343        public int getLastIndexOf(C compound) {
344                for (int i = length; i >= 1; i--) {
345                        if (compound.equals(getCompoundAt(i))) {
346                                return i;
347                        }
348                }
349                return -1;
350        }
351
352        @Override
353        public int getLength() {
354                return length;
355        }
356
357        @Override
358        public String getSequenceAsString() {
359                return SequenceMixin.toString(this);
360        }
361
362        @Override
363        public SequenceView<C> getSubSequence(Integer start, Integer end) {
364                return SequenceMixin.createSubSequence(this, start, end);
365        }
366
367        // method for Iterable
368
369        @Override
370        public Iterator<C> iterator() {
371                return getAsList().iterator();
372        }
373
374        // method from Object
375
376        /**
377         * Provides standard Java language access to results of {@link #getSequenceAsString()}.
378         */
379        @Override
380        public String toString() {
381                return getSequenceAsString();
382        }
383
384        // helper method to initialize the location
385        private void setLocation(List<Step> steps) {
386                List<Location> sublocations = new ArrayList<>();
387                int start = 0, step = 0, oStep = numBefore+numAfter, oMax = this.original.getLength(), pStep = 0, pMax =
388                                (prev == null) ? 0 : prev.getLength();
389                boolean inGap = true;
390
391                // build sublocations: pieces of sequence separated by gaps
392                for (; step < length; step++) {
393                        boolean isGapStep = (steps.get(step) == Step.GAP),
394                                        isGapPrev = (pStep < pMax && prev.isGap(pStep + 1));
395                        if (!isGapStep && !isGapPrev) {
396                                oStep++;
397                                if (inGap) {
398                                        inGap = false;
399                                        start = step + 1;
400                                }
401                        } else if (!inGap) {
402                                inGap = true;
403                                sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED));
404                        }
405                        if (prev != null && !isGapStep) {
406                                pStep++;
407                        }
408                }
409                if (!inGap) {
410                        sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED));
411                }
412
413                // combine sublocations into 1 Location
414                if (sublocations.size() == 0) {
415                        location = null;
416                } else if (sublocations.size() == 1) {
417                        location = sublocations.get(0);
418                } else {
419                        location = new SimpleLocation(sublocations.get(0).getStart(), sublocations.get(sublocations.size() - 1).getEnd(),
420                                        Strand.UNDEFINED,
421                                        false, sublocations);
422                }
423                // TODO handle circular alignments
424
425                // check that alignment has correct number of compounds in it to fit original sequence
426                if (step != length || oStep != oMax || pStep != pMax) {
427                        throw new IllegalArgumentException("Given sequence does not fit in alignment.");
428                }
429        }
430
431        @Override
432        //TODO Needs to implements
433        public SequenceView<C> getInverse() {
434                throw new UnsupportedOperationException("Not supported yet.");
435        }
436
437        @Override
438        public int getNumGapPositions() {
439                if (numGapPositions == -1)
440                        getNumGaps();
441                return numGapPositions;
442        }
443
444        @Override
445        public double getCoverage() {
446
447                double coverage = getLength() - getNumGapPositions();
448                return coverage / getOriginalSequence().getLength();
449        }
450}