001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023
024/**
025 * A location on a sequence.
026 * A location is a contiguous range of indices, with a single start and end point.
027 * <br><br>
028 * Internally, location indices are stored in Java "half-open" format: the start is the (origin 0) index of
029 * the first symbol in the range; the end is the origin 0 index of the first symbol PAST the
030 * end of the range, so that end - start == length.
031 * <br><br>
032 * Location objects, once constructed, cannot be changed. Instead, all methods return a new
033 * location. This allows the use of "method chaining" to implement a particular calculation.
034 * For example, consider the chained statement "loc.prefix( 100 ).suffix( 10 )",
035 * which first applies the prefix method to
036 * the variable named loc, and then the suffix method to the result.
037 * Together, the chained operations create a new location object of length 10
038 * whose start is the index of the 90th symbol.
039 * Here's another example. This one returns a location object holding the coordinates of the intron between
040 * the first exon (location exon1) and
041 * the second exon (location exon2) on a sequence (seq): "seq.prefix( exon2 ).suffix( exon1 )"
042 * <br><br>
043 * About the negative (reverse) strand: The location object stores reverse strand locations as
044 * negative indices. For example, the positive strand location from index 12 to index 97 is
045 * on the opposite side as index -97 (start) to index -12 (end). Note that the larger index is
046 * always downstream from the smaller index, (i.e. start &lt;= end, regardless of strand).
047 * Obviously this representation makes it trivial
048 * to convert a location from one strand to the other.
049 * <br><br>
050 * Additional points regarding the use of locations on opposite strands:
051 *<br>
052 * (1) Opposite strand locations cannot be compared, eg isBefore() will
053 * throw an exception.<br>
054 * (2) Containment queries ( eg overlaps(), contains() ) also throw exceptions.
055 *<br>
056 * (3) The plus() method will map a location to its positive strand equivalent; use it on both args
057 * before calling, for example the intersection() method,
058 * if your code needs to be indifferent to strand.
059 *<br><br>
060 * Exceptions and how they are (typically) used:
061 *<br>
062 * IllegalArgumentException - the location given as a parameter is not on the same strand as the location.
063 *<br>
064 * IndexOutOfBoundsException - often means the operation caused the location to span the origin, ie
065 * be partially on positive and partially on negative strand.
066 *<br>
067 * @author Hanno Hinsch
068 */
069public class Location implements Iterable<Location>
070{
071
072        private int mStart;
073        private int mEnd;
074
075
076        /**
077         * Construct new location from coordinates.
078         * See package description of coordinate format.
079         * @param start Origin 0 index of first symbol.
080         * @param end Origin 0 index of last symbol + 1.
081         * @throws IllegalArgumentException End is not after start, or location spans the origin
082         */
083        public Location( int start, int end )
084        {
085                mStart= start;
086                mEnd= end;
087
088                if( !isHealthy() )
089                {
090                        throw new IllegalArgumentException( "Improper location parameters: (" + start + "," + end + ")" );
091                }
092
093        }
094
095        /**
096         * Clone other location.
097         * @param other The location to clone.
098         */
099        public Location( Location other )
100        {
101                mStart= other.mStart;
102                mEnd= other.mEnd;
103
104                assert isHealthy(): toString();
105        }
106
107                public int getBegin(){
108                        if(isNegative())
109                                return mEnd;
110                        else
111                                return mStart;
112                }
113
114                public int getEnd(){
115                        if(isNegative())
116                                return mStart;
117                        else
118                                return mEnd;
119                }
120
121
122        /**
123         * Create location from "biocoordinates", as in GFF file. In biocoordinates,
124         * the start index of a range is represented in origin 1 (ie the very first index is 1, not 0),
125         * and end= start + length - 1.
126         *
127         * @param start Origin 1 index of first symbol.
128         * @param end Origin 1 index of last symbol.
129         * @param strand '+' or '-' or '.' ('.' is interpreted as '+').
130         * @return Created location.
131         * @throws IllegalArgumentException strand must be '+', '-' or '.'
132         */
133        public static Location fromBio( int start, int end, char strand )
134        {
135                int s= start - 1;
136                int e= end;
137
138                if( !( strand == '-' || strand == '+' || strand == '.' ))
139                {
140                        throw new IllegalArgumentException( "Strand must be '+', '-', or '.'" );
141                }
142
143                if( strand == '-' )
144                {
145                        //negate
146                        s= - end;
147                        e= - ( start - 1);
148                }
149
150                return new Location( s, e );
151        }
152
153        /**
154         * Create a location from MAF file coordinates, which represent negative
155         * strand locations as the distance from the end of the sequence.
156         *
157         * @param start Origin 1 index of first symbol.
158         * @param length Number of symbols in range.
159         * @param strand '+' or '-' or '.' ('.' is interpreted as '+').
160         * @param totalLength Total number of symbols in sequence.
161         * @throws IllegalArgumentException Strand must be '+', '-', '.'
162         *
163         */
164         public static Location fromBioExt( int start, int length, char strand, int totalLength )
165         {
166                int s= start;
167                int e= s + length;
168
169                if( !( strand == '-' || strand == '+' || strand == '.' ))
170                {
171                        throw new IllegalArgumentException( "Strand must be '+', '-', or '.'" );
172                }
173
174                if( strand == '-' )
175                {
176                        s= s - totalLength;
177                        e= e - totalLength;
178                }
179
180                return new Location( s, e );
181         }
182
183        /**
184         * Get character representation of strand.
185         *
186         * @return '+' or '-'
187         */
188        public char bioStrand()
189        {
190                return ( isNegative() )?'-':'+';
191        }
192
193        /**
194         * Get start index, in biocoordinates.
195         *
196         * @return The origin 1 index of the first symbol in location.
197         */
198        public int bioStart()
199        {
200                return plus().start() + 1;
201        }
202
203        /**
204         * Get end index, in biocoordinates.
205         *
206         * @return The origin 1 index of the final symbol in location.
207         */
208        public int bioEnd()
209        {
210                return plus().end();
211        }
212
213
214
215        /**
216         * Return location that is in same position on plus strand. If location is already
217         * on plus strand, just return the location unchanged.
218         *
219         * @return Location on plus strand.
220         */
221        public Location plus()
222        {
223                if( isNegative() )
224                {
225                        return opposite();
226                }
227                else
228                {
229                        return this;
230                }
231        }
232
233        /**
234         * Return location that is in same position on negative strand. If location is already
235         * on negative strand, just return the location unchanged.
236         *
237         * @return Location on negative strand.
238         */
239        public Location minus()
240        {
241                if( isNegative() )
242                {
243                        return this;
244                }
245                else
246                {
247                        return opposite();
248                }
249        }
250
251
252        /**
253        *  Return the union.
254        * <br>
255        *
256         * @param other The location to join.
257         * @return The union is a range that starts at the lesser of the two starting indices and ends at the
258         * greater of the two ends.
259         * @throws IllegalArgumentException Locations are on opposite strands.
260        */
261        public Location union( Location other )
262        {
263
264                if( !isSameStrand( other ))
265                {
266                        throw new IllegalArgumentException( "Locations are on opposite strands." );
267                }
268                else
269                {
270                        int start= (other.mStart < mStart)? other.mStart: mStart;
271                        int end= (other.mEnd > mEnd)? other.mEnd: mEnd;
272
273                        return new Location( start, end );
274                }
275
276        }
277
278        /**
279         * Return the intersection, or null if no overlap.
280         *
281         * @param other
282         *            The location to intersect.
283         * @return The maximal location that is contained by both. Returns null if
284         *         no overlap!
285         * @throws IllegalArgumentException
286         *             Locations are on opposite strands.
287         */
288        public Location intersection(Location other) {
289                if (isSameStrand(other)) {
290                        return intersect(mStart, mEnd, other.mStart, other.mEnd);
291                } else {
292                        throw new IllegalArgumentException("Locations are on opposite strands.");
293                }
294        }
295
296        private Location intersect(int a1, int a2, int b1, int b2) {
297                if (a1 > b1) {
298                        return intersect(b1, b2, a1, a2);
299                }
300                // Safe to assume a1 <= b1
301                if (b1 >= a2) {
302                        // b starts after a ends
303                        return null;
304                } else if (b1 < a2 && b2 <= a2) {
305                        // b starts after a starts and ends before or at where a ends
306                        return new Location(b1, b2);
307                } else if (b1 >= a1 && a2 <= b2) {
308                        // b starts after a but extends after the end of a
309                        return new Location(b1, a2);
310                }
311                return null;
312        }
313
314
315        /**
316         * Get starting index (origin 0).
317         *
318         * @return The start index.
319         */
320        public int start()
321        {
322                return mStart;
323        }
324
325        /**
326         * Get the ending index.
327         *
328         * @return The index of last symbol + 1 (remember Java half-open coordinates).
329         */
330        public int end()
331        {
332                return mEnd;
333        }
334
335        /**
336         * Get length of range.
337         *
338         * @return The length of the range (end - start).
339         */
340        public int length()
341        {
342                return mEnd - mStart;
343        }
344
345
346        /**
347         * Enable a "sliding window" iteration over a location
348         * to use with Java's "for" loop construct.
349         * The returned helper object implements the Iterable interface; the windowSize and increment semantics are implemented
350         * by an underlying LocIterator.
351         * <br><br>
352         * For example, given a location variable "loc":
353         *<br>
354<pre>
355        //use window size of 3 and increment of +3
356        for( Location temp: loc.window( 3, 3 ))
357        {
358        //at each iteration, temp will be the location of the next 3 symbols
359        }
360</pre>
361         *
362         * @param windowSize The number of symbols to get on each iteration.
363         * @param increment The direction and number of symbols to advance at each iteration.
364         * @return An anonymous iterable object to use with Java's for( ... ) loop construct.
365         */
366        public Iterable<Location> window( final int windowSize, final int increment )
367        {
368                final Location loc= this;
369
370                //return iterable anonymous inner class
371                return new Iterable<Location> ()
372                        {
373                                @Override
374                                public LocIterator iterator()
375                                {
376                                        return new LocIterator( loc, windowSize, increment );
377                                }
378
379                        };
380        }
381
382        /**
383         * Create a location iterator over this location with a window size of 1 and
384         * an increment of +1 (successive symbols from start to end).
385         *
386         * @return An iterator over a Location (a LocIterator object).
387         */
388        @Override
389        public LocIterator iterator()
390        {
391                return new LocIterator( this, 1, 1 );
392        }
393
394        /**
395         * Create a location iterator over this location,
396         * using specified window size and increment.
397         *
398         * @param windowSize The number of symbols to get on each iteration.
399         * @param increment The direction and number of symbols to advance at each iteration.
400         * @return An iterator over a Location (a LocIterator object).
401         */
402        public LocIterator iterator( int windowSize, int increment )
403        {
404                return new LocIterator( this, windowSize, increment );
405        }
406
407
408        /**
409         * The part of this location before the specified position. If position is negative,
410         * count backwards from the end.
411         * <br><br>
412         * For position &gt;= 0, return Location( start, start + position ).
413         * <br>
414         * For position &lt; 0, return Location( start, end + position ).
415         * <br>
416         * @return New location from start of this location to directly before position.
417         * @param position Where the prefix ends.
418         * @throws IndexOutOfBoundsException Specified prefix is longer than location.
419         */
420        public Location prefix( int position )
421        {
422                int end;
423                if( position >= 0 )
424                {
425                        if( (mStart + position <= mEnd) )
426                        {
427                                end= mStart + position;
428                        }
429                        else
430                        {
431                                throw new IndexOutOfBoundsException( "Specified prefix longer than location." );
432                        }
433                }
434                else
435                {
436                        if( (mEnd + position > mStart))
437                        {
438                                end= mEnd + position;
439                        }
440                        else
441                        {
442                                throw new IndexOutOfBoundsException( "Specified prefix longer than location." );
443                        }
444                }
445
446                return new Location( mStart, end );
447        }
448
449
450        /**
451         * The part of this location after the specified position. If position is negative, count backwards
452         * from the end.
453         * <br><br>
454         * For position &gt;= 0, return Location( start + position, end ).
455         * <br>
456         * For position &lt; 0, return Location( end - position, end ).
457         * <br>
458         * @return New location from position to end of this location.
459         * @param position Where the suffix starts.
460         * @throws IndexOutOfBoundsException Specified suffix is longer than location.
461         */
462        public Location suffix( int position )
463        {
464                int start;
465                if( position >= 0 )
466                {
467                        if( mStart + position <= mEnd ) // Scooter willis when 60 + 60 = 120 no remainder
468                        {
469                                start= mStart + position;
470                        }
471                        else
472                        {
473                                throw new IndexOutOfBoundsException( "Specified suffix longer than location." );
474                        }
475                }
476                else
477                {
478                        if( mEnd + position >= mStart )
479                        {
480                                start= mEnd + position;
481                        }
482                        else
483                        {
484                                throw new IndexOutOfBoundsException( "Specified suffix longer than location." );
485                        }
486                }
487
488                return new Location( start, mEnd );
489        }
490
491        /**
492         * The part of this location before the other location (not inclusive).
493         *
494         * @param other The other location.
495         * @return The part of this location before the other location.
496         * @throws IllegalArgumentException Locations are on opposite strands.
497         * @throws IndexOutOfBoundsException This location does not contain other location.
498         */
499        public Location prefix( Location other )
500        {
501
502                if( isSameStrand( other ) )
503                {
504                        if( other.mStart >= mStart )
505                        {
506                                return new Location( mStart, (other.mStart < mEnd)? other.mStart: mEnd );
507                        }
508                        else
509                        {
510                                //other is out of bounds -- no prefix
511                                throw new IndexOutOfBoundsException( "Specified location not within this location." );
512                        }
513                }
514                else
515                {
516                        throw new IllegalArgumentException( "Locations are on opposite strands." );
517                }
518        }
519
520        /**
521         * The part of this location after the other location (not inclusive).
522         *
523         * @param other The other location.
524         * @return The part of this location after the other location.
525         * @throws IllegalArgumentException Locations are on opposite strands.
526         * @throws IndexOutOfBoundsException This location does not contain other location.
527         */
528        public Location suffix( Location other )
529        {
530                if( isSameStrand( other ))
531                {
532                        if( other.mEnd <= mEnd )
533                        {
534                                return new Location( (other.mEnd > mStart)? other.mEnd: mStart, mEnd );
535                        }
536                        else
537                        {
538                                //other is out of bounds -- no suffix
539                                throw new IndexOutOfBoundsException( "Specified location not within this location." );
540                        }
541                }
542                else
543                {
544                        throw new IllegalArgumentException( "Locations are on opposite strands." );
545                }
546
547        }
548
549        /**
550         * Return the adjacent location of specified length directly upstream of this location.
551         *
552         * @return Upstream location.
553         * @param length The length of the upstream location.
554         * @throws IndexOutOfBoundsException Specified length causes crossing of origin.
555         */
556        public Location upstream( int length )
557        {
558                if( length < 0 )
559                {
560                        throw new IllegalArgumentException( "Parameter must be >= 0; is=" + length );
561                }
562
563                if( Math.signum( mStart - length) == Math.signum( mStart ) || 0 == Math.signum( mStart - length ) )
564                {
565                        return new Location(mStart - length, mStart );
566                }
567                else
568                {
569                        throw new IndexOutOfBoundsException( "Specified length causes crossing of origin: " + length + "; " + toString() );
570                }
571        }
572
573        /**
574         * Return the adjacent location of specified length directly downstream of this location.
575         *
576         * @return The downstream location.
577         * @param length The length of the downstream location.
578         * @throws IndexOutOfBoundsException Specified length causes crossing of origin.
579         */
580        public Location downstream( int length )
581        {
582                if( length < 0 )
583                {
584                        throw new IllegalArgumentException( "Parameter must be >= 0; is=" + length );
585                }
586
587                if( Math.signum( mEnd + length) == Math.signum( mEnd ) || 0 == Math.signum( mEnd + length ) )
588                {
589                        return new Location( mEnd, mEnd + length );
590                }
591                else
592                {
593                        throw new IndexOutOfBoundsException( "Specified length causes crossing of origin: " + length + "; " + toString() );
594                }
595
596        }
597
598
599
600        /**
601        *   Return distance between this location and the other location.
602        *
603        *       Distance is defined only if both locations are on same strand.
604         *
605         * @param other The location to compare.
606         * @return The integer distance. Returns -1 if they overlap; 0 if directly adjacent.
607         * @throws IllegalArgumentException Locations are on opposite strands.
608         */
609        public int distance( Location other )
610        {
611                if( isSameStrand( other ))
612                {
613                        if( overlaps( other ))
614                        {
615                                return -1;
616                        }
617                        else
618                        {
619                                return ( mEnd <= other.mStart )? (other.mStart - mEnd) : (mStart - other.mEnd);
620                        }
621                }
622                else
623                {
624                        throw new IllegalArgumentException( "Locations are on opposite strands." );
625                }
626        }
627
628        /**
629         * Return percent overlap of two locations.
630         *
631         * @param other The location to compare.
632         * @return 100.0 * intersection(other).length() / this.length()
633         * @throws IllegalArgumentException Locations are on opposite strands.
634         */
635        public double percentOverlap( Location other )
636        {
637                if( length() > 0 && overlaps( other ))
638                {
639                        return 100.0 * (((double) intersection( other ).length()) / (double) length());
640                }
641                else
642                {
643                        return 0;
644                }
645        }
646
647        /**
648         * Check if this location and other location overlap.
649         *
650         * @param other The location to compare.
651         * @return True if they overlap.
652         * @throws IllegalArgumentException Locations are on opposite strands.
653         */
654        public boolean overlaps( Location other )
655        {
656                if( isSameStrand( other ))
657                {
658                        return !( mStart >= other.mEnd || mEnd <= other.mStart );
659                }
660                else
661                {
662                        throw new IllegalArgumentException( "Locations are on opposite strands." );
663                }
664        }
665
666        /**
667         * Check if this location contains the other.
668         *
669         * @param other The location to compare.
670         * @return True if other is entirely contained by this location.
671         * @throws IllegalArgumentException Locations are on opposite strands.
672         */
673        public boolean contains( Location other )
674        {
675                if( isSameStrand( other ))
676                {
677                        return ( mStart <= other.mStart && mEnd >= other.mEnd );
678                }
679                else
680                {
681                        throw new IllegalArgumentException( "Locations are on opposite strands." );
682                }
683        }
684
685
686        /**
687         * Check if this location starts after the other location starts.
688         * The locations may overlap.
689         *
690         * @param other The location to compare.
691         * @return True if this starts after other.
692         * @throws IllegalArgumentException Locations are on opposite strands.
693         */
694        public boolean startsAfter( Location other )
695        {
696                if( isSameStrand( other ))
697                {
698                        return mStart > other.mStart;
699                }
700                else
701                {
702                        throw new IllegalArgumentException( "Locations are on opposite strands." );
703                }
704        }
705
706        /**
707         * Check if this location starts before other location starts.
708         * The locations may overlap.
709         *
710         * @param other The location to compare.
711         * @return True if this starts before other.
712         * @throws IllegalArgumentException Locations are on opposite strands.
713         */
714        public boolean startsBefore( Location other )
715        {
716                if( isSameStrand( other ))
717                {
718                        return mStart < other.mStart;
719                }
720                else
721                {
722                        throw new IllegalArgumentException( "Locations are on opposite strands." );
723                }
724        }
725
726        /**
727         * Check if this location ends after other location ends.
728         * The locations may overlap.
729         *
730         * @param other The location to compare.
731         * @return True if location ends after other.
732         * @throws IllegalArgumentException Locations are on opposite strands.
733         */
734        public boolean endsAfter( Location other )
735        {
736                if( isSameStrand( other ) )
737                {
738                        return mEnd > other.mEnd;
739                }
740                else
741                {
742                        throw new IllegalArgumentException( "Locations are on opposite strands." );
743                }
744        }
745
746        /**
747         * Check if this location ends before other location ends.
748         * The locations may overlap.
749         *
750         * @param other The location to compare.
751         * @return True if this ends before other.
752         * @throws IllegalArgumentException Locations are on opposite strands.
753         */
754        public boolean endsBefore( Location other )
755        {
756                if( isSameStrand( other ) )
757                {
758                        return mEnd < other.mEnd;
759                }
760                else
761                {
762                        throw new IllegalArgumentException( "Locations are on opposite strands." );
763                }
764        }
765
766        /**
767         * Check if this location is entirely after the other location (no overlap).
768         *
769         * @param other The location to compare.
770         * @return True if this is after other.
771         * @throws IllegalArgumentException Locations are on opposite strands.
772         */
773        public boolean isAfter( Location other )
774        {
775                if( isSameStrand( other ) )
776                {
777                        return mStart >= other.mEnd;
778                }
779                else
780                {
781                        throw new IllegalArgumentException( "Locations are on opposite strands." );
782                }
783        }
784
785        /**
786         * Check if this location is entirely before other location (no overlap).
787         *
788         * @param other The location to compare.
789         * @return True if this is before other.
790         * @throws IllegalArgumentException Locations are on opposite strands.
791         */
792        public boolean isBefore( Location other )
793        {
794                if( isSameStrand( other ) )
795                {
796                        return mEnd <= other.mStart;
797                }
798                else
799                {
800                        throw new IllegalArgumentException( "Locations are on opposite strands." );
801                }
802        }
803
804        /**
805         * Check if location is on negative strand.
806         * Note that Location( 0, 0 ) is by construction defined to be on the
807         * positive strand.
808         *
809         * @return True if on negative (reverse) strand.
810         */
811        public boolean isNegative()
812        {
813                return ( mStart <= 0 && mEnd <= 0 );
814        }
815
816        /**
817         * Return location that is in same position on opposite strand.
818         *
819         * @return Location on opposite strand.
820         */
821        public Location opposite()
822        {
823                return new Location( - mEnd, - mStart );
824        }
825
826        /**
827         * Check if this location is on same strand as other location.
828         *
829         * @param other The location to compare.
830         * @return True if on same strand.
831         */
832        public boolean isSameStrand( Location other )
833        {
834                return ( isNegative() && other.isNegative() ) || ( !isNegative() && !other.isNegative() );
835        }
836
837
838        /**
839         * Return a string representation of location.
840         *
841         * @return Text string.
842         */
843        @Override
844        public String toString()
845        {
846                return "[L=" + (mEnd - mStart) + "; S=" + mStart + "; E=" + mEnd +"]";
847        }
848
849        /* (non-Javadoc)
850         * @see java.lang.Object#hashCode()
851         */
852        @Override
853        public int hashCode() {
854                final int prime = 31;
855                int result = 1;
856                result = prime * result + mEnd;
857                result = prime * result + mStart;
858                return result;
859        }
860
861        /* (non-Javadoc)
862         * @see java.lang.Object#equals(java.lang.Object)
863         */
864        @Override
865        public boolean equals(Object obj) {
866                if (this == obj)
867                        return true;
868                if (obj == null)
869                        return false;
870                if (getClass() != obj.getClass())
871                        return false;
872                Location other = (Location) obj;
873                if (mEnd != other.mEnd)
874                        return false;
875                if (mStart != other.mStart)
876                        return false;
877                return true;
878        }
879
880        /**
881         *
882         */
883        private boolean isHealthy()
884        {
885                return ( mStart <= mEnd ) && (( mStart <= 0 && mEnd <= 0 ) || (mStart >= 0 && mEnd >= 0));
886        }
887
888}