001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 14, 2010
021 * Author: Mark Chapman
022 * @author Paolo Pavan
023 */
024
025package org.biojava.nbio.core.alignment;
026
027import org.biojava.nbio.core.alignment.template.AlignedSequence;
028import org.biojava.nbio.core.sequence.AccessionID;
029import org.biojava.nbio.core.sequence.Strand;
030import org.biojava.nbio.core.sequence.location.SimpleLocation;
031import org.biojava.nbio.core.sequence.location.template.Location;
032import org.biojava.nbio.core.sequence.location.template.Point;
033import org.biojava.nbio.core.sequence.template.*;
034import org.biojava.nbio.core.util.Equals;
035
036import java.io.Serializable;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.Iterator;
040import java.util.List;
041
042/**
043 * Implements a data structure for a {@link Sequence} within an alignment.
044 *
045 * @author Mark Chapman
046 * @param <C> each element of the {@link Sequence} is a {@link Compound} of type C
047 */
048public class SimpleAlignedSequence<S extends Sequence<C>, C extends Compound> implements Serializable, AlignedSequence<S, C> {
049
050
051        private static final long serialVersionUID = 1L;
052
053        private static final String gap = "-";
054
055        // always stored
056        private AlignedSequence<S, C> prev;
057        private S original;
058        private int length, numBefore, numAfter;
059        private Location location;
060
061        // cached (lazily initialized)
062        private int numGaps = -1;
063        private int numGapPositions = -1;
064
065        private int[] alignmentFromSequence;
066        private int[] sequenceFromAlignment;
067
068        /**
069         * Creates an {@link AlignedSequence} for the given {@link Sequence} in a global alignment.
070         *
071         * @param original the original {@link Sequence} before alignment
072         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
073         * @throws IllegalArgumentException if given sequence does not fit in alignment
074         */
075        public SimpleAlignedSequence(S original, List<Step> steps) {
076                this(original, steps, 0, 0);
077        }
078
079        /**
080         * Creates an {@link AlignedSequence} for the given {@link Sequence} in a local alignment.
081         *
082         * @param original the original {@link Sequence} before alignment
083         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
084         * @param numBefore number of {@link Compound}s before a local alignment
085         * @param numAfter number of {@link Compound}s after a local alignment
086         * @throws IllegalArgumentException if given sequence does not fit in alignment
087         */
088        public SimpleAlignedSequence(S original, List<Step> steps, int numBefore, int numAfter) {
089                this.original = original;
090                this.numBefore = numBefore;
091                this.numAfter = numAfter;
092                length = steps.size();
093                setLocation(steps);
094        }
095
096        /**
097         * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a global alignment.
098         *
099         * @param prev the previous {@link AlignedSequence} before this alignment
100         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
101         * @throws IllegalArgumentException if given sequence does not fit in alignment
102         */
103        public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps) {
104                this(prev, steps, 0, 0);
105        }
106
107        /**
108         * Creates a new {@link AlignedSequence} for the given {@link AlignedSequence} in a local alignment.
109         *
110         * @param prev the previous {@link AlignedSequence} before this alignment
111         * @param steps lists whether the sequence aligns a {@link Compound} or gap at each index of the alignment
112         * @param numBefore number of {@link Compound}s before a local alignment
113         * @param numAfter number of {@link Compound}s after a local alignment
114         * @throws IllegalArgumentException if given sequence does not fit in alignment
115         */
116        public SimpleAlignedSequence(AlignedSequence<S, C> prev, List<Step> steps, int numBefore, int numAfter) {
117                this.prev = prev;
118                this.original = prev.getOriginalSequence();
119                this.numBefore = numBefore;
120                this.numAfter = numAfter;
121                if (prev instanceof SimpleAlignedSequence<?, ?>) {
122                        SimpleAlignedSequence<?, ?> p = (SimpleAlignedSequence<?, ?>) prev;
123                        this.numBefore += p.numBefore;
124                        this.numAfter += p.numAfter;
125                }
126                length = steps.size();
127                setLocation(steps);
128        }
129
130        // methods for AlignedSequence
131
132        @Override
133        public void clearCache() {
134                alignmentFromSequence = null;
135                sequenceFromAlignment = null;
136        }
137
138        private void setAlignmentFromSequence() {
139                alignmentFromSequence = new int[original.getLength()];
140                int s = 1, a = 1;
141                for (int i = 0; i < numBefore; i++, s++) {
142                        alignmentFromSequence[s - 1] = a;
143                }
144                for (; s <= alignmentFromSequence.length && a <= length; s++, a++) {
145                        while (a <= length && isGap(a)) {
146                                a++;
147                        }
148                        alignmentFromSequence[s - 1] = a;
149                }
150                a--;
151                for (int i = 0; i < numAfter; i++, s++) {
152                        alignmentFromSequence[s - 1] = a;
153                }
154        }
155
156        @Override
157        public int[] getAlignmentFromSequence() {
158                if (alignmentFromSequence == null)
159                        setAlignmentFromSequence();
160                return alignmentFromSequence;
161        }
162
163        @Override
164        public int getAlignmentIndexAt(int sequenceIndex) {
165                if (alignmentFromSequence == null)
166                        setAlignmentFromSequence();
167                return alignmentFromSequence[sequenceIndex - 1];
168        }
169
170        @Override
171        public Point getEnd() {
172                return location.getEnd();
173        }
174
175        @Override
176        public Location getLocationInAlignment() {
177                return location;
178        }
179
180        @Override
181        public int getNumGaps() {
182                if (numGaps == -1) {
183                        numGaps = 0;
184                        numGapPositions = 0;
185                        C cGap = getCompoundSet().getCompoundForString(gap);
186                        boolean inGap = false;
187                        for (C compound : getAsList()) {
188                                if (compound == null || compound.equalsIgnoreCase(cGap)) {
189                                        if (!inGap) {
190                                                numGaps++;
191                                                inGap = true;
192                                        }
193                                        numGapPositions++;
194                                } else {
195                                        inGap = false;
196                                }
197                        }
198                }
199                return numGaps;
200        }
201
202        @Override
203        public S getOriginalSequence() {
204                return original;
205        }
206
207        @Override
208        public int getOverlapCount() {
209                // TODO handle circular alignments
210                return 1;
211        }
212
213        private void setSequenceFromAlignment() {
214                sequenceFromAlignment = new int[length];
215                int a = 1, s = numBefore + 1;
216                for (int i = 0; i < getStart().getPosition(); i++, a++) {
217                        sequenceFromAlignment[a - 1] = s;
218                }
219                for (; a <= length; a++) {
220                        if (!isGap(a)) {
221                                s++;
222                        }
223                        sequenceFromAlignment[a - 1] = s;
224                }
225        }
226
227        @Override
228        public int[] getSequenceFromAlignment() {
229                if (sequenceFromAlignment == null)
230                        setSequenceFromAlignment();
231                return sequenceFromAlignment;
232        }
233
234        @Override
235        public int getSequenceIndexAt(int alignmentIndex) {
236                if (sequenceFromAlignment == null)
237                        setSequenceFromAlignment();
238                return sequenceFromAlignment[alignmentIndex - 1];
239        }
240
241        @Override
242        public Point getStart() {
243                return location.getStart();
244        }
245
246        @Override
247        public boolean isCircular() {
248                return location.isCircular();
249        }
250
251        @Override
252        public boolean isGap(int alignmentIndex) {
253                if (getStart().getPosition() <= alignmentIndex && alignmentIndex <= getEnd().getPosition()) {
254                        if (!location.isComplex()) {
255                                return false;
256                        }
257                        for (Location sublocation : location) {
258                                if (sublocation.getStart().getPosition() <= alignmentIndex &&
259                                                alignmentIndex <= sublocation.getEnd().getPosition()) {
260                                        return false;
261                                }
262                        }
263                }
264                return true;
265        }
266
267        // methods for Sequence
268
269        @Override
270        public int countCompounds(C... compounds) {
271                int count = 0;
272                List<C> search = Arrays.asList(compounds);
273                for (C compound : getAsList()) {
274                        if (search.contains(compound)) {
275                                count++;
276                        }
277                }
278                return count;
279        }
280
281        @Override
282        public AccessionID getAccession() {
283                return original.getAccession();
284        }
285
286        @Override
287        public List<C> getAsList() {
288                List<C> compounds = new ArrayList<C>();
289                for (int i = 1; i <= length; i++) {
290                        compounds.add(getCompoundAt(i));
291                }
292                return compounds;
293        }
294
295        @Override
296        public boolean equals(Object o){
297
298                if(! Equals.classEqual(this, o)) {
299                        return false;
300                }
301
302                Sequence<C> other = (Sequence<C>)o;
303                if ( original.getAsList().size() != other.getAsList().size())
304                        return false;
305
306                for ( int i = 0 ; i< original.getAsList().size() ; i++){
307                        if ( ! original.getAsList().get(i).equalsIgnoreCase(other.getAsList().get(i)))
308                                return false;
309                }
310                return true;
311        }
312
313        @Override
314        public int hashCode(){
315                String s = getSequenceAsString();
316                return s.hashCode();
317        }
318
319        @Override
320        public C getCompoundAt(int alignmentIndex) {
321                return alignmentIndex >= 1 && alignmentIndex <= length && isGap(alignmentIndex) ?
322                                getCompoundSet().getCompoundForString(gap) :
323                                original.getCompoundAt(getSequenceIndexAt(alignmentIndex));
324        }
325
326        @Override
327        public CompoundSet<C> getCompoundSet() {
328                return original.getCompoundSet();
329        }
330
331        @Override
332        public int getIndexOf(C compound) {
333                for (int i = 1; i <= length; i++) {
334                        if (compound.equals(getCompoundAt(i))) {
335                                return i;
336                        }
337                }
338                return -1;
339        }
340
341        @Override
342        public int getLastIndexOf(C compound) {
343                for (int i = length; i >= 1; i--) {
344                        if (compound.equals(getCompoundAt(i))) {
345                                return i;
346                        }
347                }
348                return -1;
349        }
350
351        @Override
352        public int getLength() {
353                return length;
354        }
355
356        @Override
357        public String getSequenceAsString() {
358                return SequenceMixin.toString(this);
359        }
360
361        @Override
362        public SequenceView<C> getSubSequence(Integer start, Integer end) {
363                return SequenceMixin.createSubSequence(this, start, end);
364        }
365
366        // method for Iterable
367
368        @Override
369        public Iterator<C> iterator() {
370                return getAsList().iterator();
371        }
372
373        // method from Object
374
375        /**
376         * Provides standard Java language access to results of {@link #getSequenceAsString()}.
377         */
378        @Override
379        public String toString() {
380                return getSequenceAsString();
381        }
382
383        // helper method to initialize the location
384        private void setLocation(List<Step> steps) {
385                List<Location> sublocations = new ArrayList<Location>();
386                int start = 0, step = 0, oStep = numBefore+numAfter, oMax = this.original.getLength(), pStep = 0, pMax =
387                                (prev == null) ? 0 : prev.getLength();
388                boolean inGap = true;
389
390                // build sublocations: pieces of sequence separated by gaps
391                for (; step < length; step++) {
392                        boolean isGapStep = (steps.get(step) == Step.GAP),
393                                        isGapPrev = (pStep < pMax && prev.isGap(pStep + 1));
394                        if (!isGapStep && !isGapPrev) {
395                                oStep++;
396                                if (inGap) {
397                                        inGap = false;
398                                        start = step + 1;
399                                }
400                        } else if (!inGap) {
401                                inGap = true;
402                                sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED));
403                        }
404                        if (prev != null && !isGapStep) {
405                                pStep++;
406                        }
407                }
408                if (!inGap) {
409                        sublocations.add(new SimpleLocation(start, step, Strand.UNDEFINED));
410                }
411
412                // combine sublocations into 1 Location
413                if (sublocations.size() == 0) {
414                        location = null;
415                } else if (sublocations.size() == 1) {
416                        location = sublocations.get(0);
417                } else {
418                        location = new SimpleLocation(sublocations.get(0).getStart(), sublocations.get(sublocations.size() - 1).getEnd(),
419                                        Strand.UNDEFINED,
420                                        false, sublocations);
421                }
422                // TODO handle circular alignments
423
424                // check that alignment has correct number of compounds in it to fit original sequence
425                if (step != length || oStep != oMax || pStep != pMax) {
426                        throw new IllegalArgumentException("Given sequence does not fit in alignment.");
427                }
428        }
429
430        @Override
431        //TODO Needs to implements
432        public SequenceView<C> getInverse() {
433                throw new UnsupportedOperationException("Not supported yet.");
434        }
435
436        @Override
437        public int getNumGapPositions() {
438                if (numGapPositions == -1)
439                        getNumGaps();
440                return numGapPositions;
441        }
442
443        @Override
444        public double getCoverage() {
445
446                double coverage = getLength() - getNumGapPositions();
447                return coverage / getOriginalSequence().getLength();
448        }
449}