001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.symbol;
023
024import java.io.Serializable;
025import java.util.ArrayList;
026import java.util.Collections;
027import java.util.Iterator;
028import java.util.List;
029
030import org.biojava.bio.BioError;
031import org.biojava.utils.AssertionFailure;
032
033/**
034 * This implementation of GappedSymbolList wraps a SymbolList, allowing you to
035 * insert gaps. Please note that this is a view onto another SymbolList. Gaps
036 * created and removed are only in the view not the underlying original. This
037 * means that any gaps present in the original cannot be manipulated in this
038 * view. To manipulate the original you would need to use Edit objects.
039 * 
040 * @author Matthew Pocock
041 * @since 1.3
042 */
043public class SimpleGappedSymbolList extends AbstractSymbolList implements
044                GappedSymbolList, Serializable {
045        /**
046         * An aligned block.
047         * <p>
048         * The alignment is actualy stoored as a list of these objects. Each block
049         * is contiguous with the next in the source fields, but may be gapped in
050         * the view fields.
051         * 
052         * @author Matthew Pocock
053         */
054        protected static final class Block implements Serializable {
055                private static final long serialVersionUID = 4090888450309921439L;
056
057                public int sourceStart;
058                public int sourceEnd;
059                public int viewStart;
060                public int viewEnd;
061
062                public Block(Block block) {
063                        this.sourceStart = block.sourceStart;
064                        this.sourceEnd = block.sourceEnd;
065                        this.viewStart = block.viewStart;
066                        this.viewEnd = block.viewEnd;
067
068                        // fixme: should be using 1.4 assertion syntax
069                        // assert isSane() : "Block is a sane shape: " + this.toString();
070                        assert isSane() : "Block is a sane shape: " + this.toString();
071                }
072
073                public Block(int sourceStart, int sourceEnd, int viewStart, int viewEnd) {
074                        this.sourceStart = sourceStart;
075                        this.sourceEnd = sourceEnd;
076                        this.viewStart = viewStart;
077                        this.viewEnd = viewEnd;
078                }
079
080                public boolean isSane() {
081                        return (viewEnd - viewStart) == (sourceEnd - sourceStart);
082                }
083
084                public String toString() {
085                        return "Block: source=" + sourceStart + "," + sourceEnd + " view="
086                                        + viewStart + "," + viewEnd;
087                }
088        }
089
090        private static final long serialVersionUID = 4258621048300499709L;
091
092        /**
093         * The Alphabet - the same as source but guaranteed to include the gap
094         * character.
095         */
096        private final Alphabet alpha;
097
098        /**
099         * The SymbolList to view.
100         */
101        private final SymbolList source;
102
103        /**
104         * The list of ungapped blocks that align between source and this view.
105         */
106        private final List<Block> blocks;
107
108        /**
109         * The total length of the alignment - necessary to allow leading & trailing
110         * gaps.
111         */
112        private int length;
113
114        /**
115         * Create a new SimpleGappedSymbolList that will view source, inheriting all
116         * existing gaps.
117         * 
118         * @param gappedSource
119         *            the underlying sequence
120         */
121        public SimpleGappedSymbolList(GappedSymbolList gappedSource) {
122                this.source = gappedSource.getSourceSymbolList();
123                this.alpha = this.source.getAlphabet();
124                this.blocks = new ArrayList<Block>();
125                this.length = this.source.length();
126                Block b = new Block(1, length, 1, length);
127                blocks.add(b);
128                int n = 1;
129                for (int i = 1; i <= this.length(); i++)
130                        if (this.alpha.getGapSymbol().equals(gappedSource.symbolAt(i)))
131                                this.addGapInSource(n);
132                        else
133                                n++;
134        }
135
136        /**
137         * Create a new SimpleGappedSymbolList that will view source.
138         * 
139         * @param source
140         *            the underlying sequence
141         */
142        public SimpleGappedSymbolList(SymbolList source) {
143                this.source = source;
144                this.alpha = source.getAlphabet();
145                this.blocks = new ArrayList<Block>();
146                this.length = source.length();
147                Block b = new Block(1, length, 1, length);
148                blocks.add(b);
149        }
150        
151        public SimpleGappedSymbolList(Alphabet alpha) {
152                this.alpha = alpha;
153                this.source = new SimpleSymbolList(this.alpha);
154                this.blocks = new ArrayList<Block>();
155                this.length = source.length();
156                // Block b = new Block(1, length, 1, length);
157                // blocks.add(b);
158        }
159
160        public void addGapInSource(int pos) throws IndexOutOfBoundsException {
161                addGapsInSource(pos, 1);
162        }
163
164        public void addGapInView(int pos) throws IndexOutOfBoundsException {
165                addGapsInView(pos, 1);
166        }
167
168        public void addGapsInSource(int pos, int length) {
169                if (pos < 1 || pos > (length() + 1)) {
170                        throw new IndexOutOfBoundsException(
171                                        "Attempted to add a gap outside of this sequence (1.."
172                                                        + length() + ") at " + pos);
173                }
174
175                int i = blocks.size() / 2;
176                int imin = 0;
177                int imax = blocks.size() - 1;
178
179                do {
180                        Block b = blocks.get(i);
181                        if (b.sourceStart < pos && b.sourceEnd >= pos) { // found block that
182                                // need
183                                // splitting
184                                // with gaps
185                                Block c = new Block(pos, b.sourceEnd, sourceToView(b, pos),
186                                                b.viewEnd);
187                                b.viewEnd = sourceToView(b, pos - 1);
188                                b.sourceEnd = pos - 1;
189                                blocks.add(i + 1, c);
190                                renumber(i + 1, length);
191                                return;
192                        } else {
193                                if (b.sourceStart < pos) {
194                                        imin = i + 1;
195                                        i = imin + (imax - imin) / 2;
196                                } else {
197                                        imax = i - 1;
198                                        i = imin + (imax - imin) / 2;
199                                }
200                        }
201                } while (imin <= imax);
202
203                // extending an already existing run of gaps;
204                if (i < blocks.size()) {
205                        Block b = blocks.get(i);
206                        if (b.sourceStart <= pos) {
207                                i--;
208                        }
209                }
210                renumber(i + 1, length);
211
212                assert isSane() : "Data corrupted: " + blocks;
213        }
214
215        public void addGapsInView(int pos, int length)
216                        throws IndexOutOfBoundsException {
217                if (pos < 1 || pos > (length() + 1)) {
218                        throw new IndexOutOfBoundsException(
219                                        "Attempted to add a gap outside of this sequence (1.."
220                                                        + length() + ") at " + pos);
221                }
222
223                int i = blocks.size() / 2;
224                int imin = 0;
225                int imax = blocks.size() - 1;
226
227                do {
228                        Block b = blocks.get(i);
229                        if (b.viewStart < pos && b.viewEnd >= pos) { // found block that
230                                // need splitting
231                                // with gaps
232                                Block c = new Block(viewToSource(b, pos), b.sourceEnd, pos,
233                                                b.viewEnd);
234                                b.viewEnd = pos - 1;
235                                b.sourceEnd = viewToSource(b, pos - 1);
236                                blocks.add(i + 1, c);
237                                renumber(i + 1, length);
238                                return;
239                        } else {
240                                if (b.viewStart < pos) {
241                                        imin = i + 1;
242                                        i = imin + (imax - imin) / 2;
243                                } else {
244                                        imax = i - 1;
245                                        i = imin + (imax - imin) / 2;
246                                }
247                        }
248                } while (imin <= imax);
249
250                // extending an already existing run of gaps;
251                if (i < blocks.size()) {
252                        Block b = blocks.get(i);
253                        if (pos <= b.viewStart) {
254                                i--;
255                        }
256                } else {
257                        i--;
258                }
259                renumber(i + 1, length);
260        }
261
262        /**
263         * Get list of the un-gapped region of the SymbolList.
264         * <p>
265         * The gapped symbol list can be represented as a list of ungapped regions.
266         * Given a list of start-stop pairs in the ungapped coordinate system each
267         * with a corresponding pair of start-stop pairs in the gapped view, the
268         * entire gapped list can be reconstructed.
269         * 
270         * @return a List of Block instances
271         */
272        public List<Block> BlockIterator() {
273                return Collections.unmodifiableList(blocks);
274        }
275
276        /**
277         * Debugging method
278         */
279        public void dumpBlocks() {
280                for (Iterator<Block> i = blocks.iterator(); i.hasNext();) {
281                        Block b = i.next();
282                        System.out.println(b.sourceStart + ".." + b.sourceEnd + "\t"
283                                        + b.viewStart + ".." + b.viewEnd);
284                }
285        }
286
287        public int firstNonGap() {
288                int first = (blocks.get(0)).viewStart;
289                return first;
290        }
291
292        /**
293         * Translates a Location from the gapped view into the underlying sequence.
294         * End points that are in gaps are moved 'inwards' to shorten the location.
295         * 
296         * @since 1.3
297         */
298
299        public Location gappedToLocation(Location l) {
300                if (l.isContiguous()) {
301                        return gappedToBlock(l);
302                } else {
303                        List<Location> lblocks = new ArrayList<Location>();
304                        for (Iterator<Location> i = l.blockIterator(); i.hasNext();) {
305                                lblocks.add(gappedToBlock((Location) i.next()));
306                        }
307                        return LocationTools.union(lblocks);
308                }
309        }
310
311        public Alphabet getAlphabet() {
312                return alpha;
313        }
314
315        public SymbolList getSourceSymbolList() {
316                return source;
317        }
318
319        public Location getUngappedLocation() {
320                List<Location> locList = new ArrayList<Location>(blocks.size());
321                for (Iterator<Block> i = blocks.iterator(); i.hasNext();) {
322                        Block block = i.next();
323                        locList.add(new RangeLocation(block.viewStart, block.viewEnd));
324                }
325
326                return LocationTools.union(locList);
327        }
328
329        public int lastNonGap() {
330                int last = (blocks.get(blocks.size() - 1)).viewEnd;
331                return last;
332        }
333
334        public int length() {
335                return length;
336        }
337
338        /**
339         * Translate a Location onto the gapped view, splitting blocks if necessary
340         * 
341         * @since 1.3
342         */
343
344        public Location locationToGapped(Location l) {
345                if (l.isContiguous()) {
346                        return blockToGapped(l);
347                } else {
348                        List<Location> lblocks = new ArrayList<Location>();
349                        for (Iterator<Location> i = l.blockIterator(); i.hasNext();)
350                                lblocks.add(blockToGapped(i.next()));
351                        return LocationTools.union(lblocks);
352                }
353        }
354
355        public void removeGap(int pos) throws IndexOutOfBoundsException,
356                        IllegalSymbolException {
357                if (pos < 1 || pos > length()) {
358                        throw new IndexOutOfBoundsException(
359                                        "Attempted to remove gap outside of this sequence (1.."
360                                                        + length() + ") at " + pos);
361                }
362                int i = findViewGap(pos);
363                if (i == -2) {
364                        throw new IllegalSymbolException(
365                                        "Attempted to remove a gap at a non-gap index: " + pos
366                                                        + " -> " + symbolAt(pos).getName());
367                }
368
369                if (i == -1 || i == (blocks.size() - 1)) { // at the beginning or the
370                        // end
371                        renumber(i + 1, -1);
372                } else { // internal
373                        Block l = blocks.get(i);
374                        Block r = blocks.get(i + 1);
375                        renumber(i + 1, -1);
376                        int gap = r.viewStart - l.viewEnd;
377                        if (gap == 1) { // removing the last gap - need to join blocks l & r
378                                // together
379                                l.sourceEnd = r.sourceEnd;
380                                l.viewEnd = r.viewEnd;
381                                blocks.remove(i + 1);
382                        }
383                }
384
385                assert isSane() : "Data corrupted: " + blocks;
386        }
387
388        public void removeGaps(int pos, int length)
389                        throws IndexOutOfBoundsException, IllegalSymbolException {
390                int end = pos + length - 1;
391                if (pos < 1 || length < 1 || end > length()) {
392                        throw new IndexOutOfBoundsException(
393                                        "Attempted to remove gap outside of this sequence (1.."
394                                                        + length() + ") at (" + pos + ".." + end + ")");
395                }
396                int i = findViewGap(pos);
397                if (i == -2) {
398                        throw new IllegalSymbolException(
399                                        "Attempted to remove a gap at a non-gap index: " + pos
400                                                        + " -> " + symbolAt(pos).getName());
401                }
402
403                if (i == -1) { // removing track at the beginning
404                        Block b = blocks.get(0);
405                        if (b.viewStart <= end) {
406                                throw new IllegalSymbolException(
407                                                "Attempted to remove some non-gap characters at ("
408                                                                + pos + ".." + end + ")");
409                        }
410                        renumber(i + 1, -length);
411                } else if (i == (blocks.size() - 1)) { // removing trailing gaps
412                        this.length -= length;
413                } else { // removing internal gaps
414                        Block l = blocks.get(i);
415                        Block r = blocks.get(i + 1);
416                        int gap = r.viewStart - l.viewEnd;
417                        if (gap < length) {
418                                throw new IllegalSymbolException("Removing " + length
419                                                + " gaps from + " + i + " but there are only " + gap
420                                                + " gaps there: " + blocks);
421                        }
422
423                        renumber(i + 1, -length);
424                        if (gap == length) { // deleted an entire gapped region
425                                l.sourceEnd = r.sourceEnd;
426                                l.viewEnd = r.viewEnd;
427                                blocks.remove(i + 1);
428                        }
429                }
430
431                assert isSane() : "Data corrupted: removeGaps(" + pos + "," + length
432                                + ") " + blocks;
433        }
434
435        public final int sourceToView(int indx) throws IndexOutOfBoundsException {
436                if (indx < 1 || indx > source.length()) {
437                        throw new IndexOutOfBoundsException(
438                                        "Attempted to address source sequence (1.." + length()
439                                                        + ") at " + indx);
440                }
441                int j = findSourceBlock(indx);
442                return sourceToView(blocks.get(j), indx);
443        }
444
445        public Symbol symbolAt(int indx) throws IndexOutOfBoundsException {
446                if (indx > length() || indx < 1) {
447                        throw new IndexOutOfBoundsException(
448                                        "Attempted to read outside of this sequence (1.."
449                                                        + length() + ") at " + indx);
450                }
451                int i = findViewBlock(indx);
452                if (i < 0) {
453                        if ((indx < firstNonGap()) || (indx > lastNonGap())) {
454                                return Alphabet.EMPTY_ALPHABET.getGapSymbol();
455                        } else {
456                                return getAlphabet().getGapSymbol();
457                        }
458                } else {
459                        try {
460                                Block b = blocks.get(i);
461                                return source.symbolAt(b.sourceStart - b.viewStart + indx);
462                        } catch (IndexOutOfBoundsException e) {
463                                throw new AssertionFailure(
464                                                "Internal book-keeping error fetching index: " + indx
465                                                                + " of " + length() + " blocks: " + blocks, e);
466                        }
467                }
468        }
469
470        public final int viewToSource(int indx) throws IndexOutOfBoundsException {
471                if (indx < 1 || indx > length()) {
472                        throw new IndexOutOfBoundsException(
473                                        "Attempted to address sequence (1.." + length() + ") at "
474                                                        + indx);
475                }
476                int j = findViewBlock(indx);
477                if (j < 0) {
478                        int nj = -j - 1;
479                        // System.out.println("Converted: " + indx + ":" + j + " to " + nj);
480                        if (nj < blocks.size()) {
481                                Block b = blocks.get(nj);
482                                // System.out.println("Has a following block: " + b);
483                                return -b.sourceStart;
484                        } else {
485                                // System.out.println("Has no following block");
486                                return -(source.length() + 1);
487                        }
488                } else {
489                        return viewToSource(blocks.get(j), indx);
490                }
491        }
492
493        private Location blockToGapped(Location l) {
494                int start = l.getMin();
495                int end = l.getMax();
496
497                List<Location> lblocks = new ArrayList<Location>();
498
499                while (start <= end) {
500                        int containingBlockIdx = findSourceBlock(start);
501                        Block containingBlock = blocks.get(containingBlockIdx);
502                        int gbstart = start - containingBlock.sourceStart
503                                        + containingBlock.viewStart;
504                        int gbend = Math
505                                        .min(end - start + gbstart, containingBlock.viewEnd);
506                        lblocks.add(new RangeLocation(gbstart, gbend));
507                        start = start + gbend - gbstart + 1;
508                }
509
510                return LocationTools.union(lblocks);
511        }
512
513        private Location gappedToBlock(Location l) {
514                int start = l.getMin();
515                int end = l.getMax();
516
517                int startBlockI = findViewBlock(start);
518                int endBlockI = findViewBlock(end);
519
520                if (startBlockI < 0) { // in a gap
521                        int sb = -startBlockI - 1;
522                        if (sb == blocks.size()) {
523                                start = Integer.MAX_VALUE;
524                        } else {
525                                Block startBlock = blocks.get(sb);
526
527                                start = startBlock.sourceStart;
528                        }
529                } else {
530                        Block startBlock = blocks.get(startBlockI);
531                        start = start - startBlock.viewStart + startBlock.sourceStart;
532                }
533
534                if (endBlockI < 0) { // in a gap
535                        int eb = -endBlockI - 1;
536                        if (eb == 0) {
537                                end = Integer.MIN_VALUE;
538                        } else {
539                                Block endBlock = blocks.get(eb - 1);
540
541                                end = endBlock.sourceEnd;
542                        }
543                } else {
544                        Block endBlock = blocks.get(endBlockI);
545                        end = end - endBlock.viewEnd + endBlock.sourceEnd;
546                }
547
548                if (start > end) {
549                        return Location.empty;
550                } else {
551                        return new RangeLocation(start, end);
552                }
553        }
554
555        /**
556         * Finds the index of the block containing the source coordinate indx.
557         * 
558         * @param indx
559         *            the index to find
560         * @return the index of the Block containing indx
561         */
562        protected final int findSourceBlock(int indx) {
563                int i = blocks.size() / 2;
564                int imin = 0;
565                int imax = blocks.size() - 1;
566
567                do {
568                        Block b = blocks.get(i);
569                        if (b.sourceStart <= indx && b.sourceEnd >= indx) {
570                                return i;
571                        } else {
572                                if (b.sourceStart < indx) {
573                                        imin = i + 1;
574                                        i = imin + (imax - imin) / 2;
575                                } else {
576                                        imax = i - 1;
577                                        i = imin + (imax - imin) / 2;
578                                }
579                        }
580                } while (imin <= imax);
581
582                throw new BioError(
583                                "Something is screwed. Could not find source block containing index "
584                                                + indx + " in sequence of length " + source.length());
585        }
586
587        /**
588         * Finds the index of the Block before the gap at indx within the following
589         * gap.
590         * 
591         * @param indx
592         *            the index to find within a gap
593         * @return the index of the block with indx in the gap
594         */
595        protected final int findSourceGap(int indx) {
596                int i = blocks.size() / 2;
597                int imin = 0;
598                int imax = blocks.size() - 1;
599
600                do {
601                        Block b = blocks.get(i);
602                        if (b.sourceStart <= indx && b.sourceEnd >= indx) {
603                                return -1;
604                        } else {
605                                if (b.sourceStart < indx) {
606                                        imin = i + 1;
607                                        i = imin + (imax - imin) / 2;
608                                } else {
609                                        imax = i - 1;
610                                        i = imin + (imax - imin) / 2;
611                                }
612                        }
613                } while (imin <= imax);
614
615                Block b = blocks.get(i);
616                if (b.viewEnd < indx) {
617                        return i;
618                } else {
619                        return i - 1;
620                }
621        }
622
623        /**
624         * Finds the index of the Block containing indx within the view ranges.
625         * <p>
626         * If indx is not within a view block, then it is the index of a gap. The
627         * method will return -(indx+1) where indx is the block emediately following
628         * the gap.
629         * 
630         * @param indx
631         *            the index to find within a view range.
632         * @return the index of the block containing index or one less than the
633         *         negative of the index of the block following the gap
634         */
635        protected final int findViewBlock(int indx) {
636                int i = blocks.size() / 2;
637                int imin = 0;
638                int imax = blocks.size() - 1;
639                // System.out.println("Searching for " + indx);
640
641                Block b;
642                do {
643                        // System.out.println(imin + " < " + i + " < " + imax);
644                        b = blocks.get(i);
645                        // System.out.println("Checking " + b.viewStart + ".." + b.viewEnd);
646                        if (b.viewStart <= indx && b.viewEnd >= indx) {
647                                // System.out.println("hit");
648                                return i;
649                        } else {
650                                if (b.viewStart < indx) {
651                                        // System.out.println("Too low");
652                                        imin = i + 1;
653                                        i = imin + (imax - imin) / 2;
654                                } else {
655                                        // System.out.println("Too high");
656                                        imax = i - 1;
657                                        i = imin + (imax - imin) / 2;
658                                }
659                        }
660                } while (imin <= imax);
661
662                if (i >= blocks.size()) {
663                        return -blocks.size() - 1;
664                }
665
666                if (blocks.get(i) != b) {
667                        if (blocks.get(i - 1) == b) {
668                                --i;
669                        } else if (blocks.get(i + 1) == b) {
670                                ++i;
671                        }
672                }
673
674                // System.out.println("Finding block for: " + indx + " in " + blocks);
675                // System.out.println("\ti=" + i);
676                // System.out.println("\t" + blocks.get(i));
677                if (indx > b.viewStart) { // block before gap - move to block after gap
678                        // System.out.println("\tAdvancing i");
679                        i++;
680                        if (i < blocks.size()) {
681                                // System.out.println("\t" + blocks.get(i));
682                        }
683                }
684                return -i - 1;
685        }
686
687        /**
688         * Finds the index of the Block before the gap at indx within the view
689         * range.
690         * <p>
691         * If indx is in-fact a real symbol, then there will be no Block before it.
692         * In this case, the method returns -2. It returns -1 if indx is within the
693         * leading gaps and blocks.size()-1 if it is within the trailing gaps.
694         * 
695         * @param indx
696         *            the index to find within a view range
697         * @return the index of the block with indx in the following gap
698         */
699        protected final int findViewGap(int indx) {
700                int i = blocks.size() / 2;
701                int imin = 0;
702                int imax = blocks.size() - 1;
703
704                do {
705                        Block b = blocks.get(i);
706                        if (b.viewStart <= indx && b.viewEnd >= indx) {
707                                return -2;
708                        } else {
709                                if (b.viewStart < indx) {
710                                        imin = i + 1;
711                                        i = imin + (imax - imin) / 2;
712                                } else {
713                                        imax = i - 1;
714                                        i = imin + (imax - imin) / 2;
715                                }
716                        }
717                } while (imin <= imax);
718
719                if (i < blocks.size()) {
720                        Block b = blocks.get(i);
721                        if (b.viewEnd < indx) {
722                                return i;
723                        } else {
724                                return i - 1;
725                        }
726                } else {
727                        return i - 1;
728                }
729        }
730
731        protected boolean isSane() {
732                for (Iterator<Block> i = blocks.iterator(); i.hasNext();)
733                        if (!i.next().isSane())
734                                return false;
735                return true;
736        }
737
738        /**
739         * Renumber the view indexes from block, adding delta to each offset.
740         * <p>
741         * This adjusts viewStart and viewEnd to be += delta for each block
742         * i->blocks.size(), and sets the total length to += delta.
743         * 
744         * @param i
745         *            the first
746         */
747        protected final void renumber(int i, int delta) {
748                for (int j = i; j < blocks.size(); j++) {
749                        Block b = blocks.get(j);
750                        b.viewStart += delta;
751                        b.viewEnd += delta;
752                }
753                length += delta;
754        }
755
756        /**
757         * Coordinate conversion from source to view.
758         * 
759         * @param b
760         *            the block containing indx
761         * @param indx
762         *            the index to project
763         * @return the position of indx projected from source to view
764         */
765        protected final int sourceToView(Block b, int indx) {
766                return indx - b.sourceStart + b.viewStart;
767        }
768
769        /**
770         * Coordinate conversion from view to source.
771         * 
772         * @param b
773         *            the block containing indx
774         * @param indx
775         *            the index to project
776         * @return the position of indx projected from view to source
777         */
778        protected final int viewToSource(Block b, int indx) {
779                return indx - b.viewStart + b.sourceStart;
780        }
781}