001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.parsers.gff; 022 023 024/** 025 * A location on a sequence. 026 * A location is a contiguous range of indices, with a single start and end point. 027 * <br><br> 028 * Internally, location indices are stored in Java "half-open" format: the start is the (origin 0) index of 029 * the first symbol in the range; the end is the origin 0 index of the first symbol PAST the 030 * end of the range, so that end - start == length. 031 * <br><br> 032 * Location objects, once constructed, cannot be changed. Instead, all methods return a new 033 * location. This allows the use of "method chaining" to implement a particular calculation. 034 * For example, consider the chained statement "loc.prefix( 100 ).suffix( 10 )", 035 * which first applies the prefix method to 036 * the variable named loc, and then the suffix method to the result. 037 * Together, the chained operations create a new location object of length 10 038 * whose start is the index of the 90th symbol. 039 * Here's another example. This one returns a location object holding the coordinates of the intron between 040 * the first exon (location exon1) and 041 * the second exon (location exon2) on a sequence (seq): "seq.prefix( exon2 ).suffix( exon1 )" 042 * <br><br> 043 * About the negative (reverse) strand: The location object stores reverse strand locations as 044 * negative indices. For example, the positive strand location from index 12 to index 97 is 045 * on the opposite side as index -97 (start) to index -12 (end). Note that the larger index is 046 * always downstream from the smaller index, (i.e. start <= end, regardless of strand). 047 * Obviously this representation makes it trivial 048 * to convert a location from one strand to the other. 049 * <br><br> 050 * Additional points regarding the use of locations on opposite strands: 051 *<br> 052 * (1) Opposite strand locations cannot be compared, eg isBefore() will 053 * throw an exception.<br> 054 * (2) Containment queries ( eg overlaps(), contains() ) also throw exceptions. 055 *<br> 056 * (3) The plus() method will map a location to its positive strand equivalent; use it on both args 057 * before calling, for example the intersection() method, 058 * if your code needs to be indifferent to strand. 059 *<br><br> 060 * Exceptions and how they are (typically) used: 061 *<br> 062 * IllegalArgumentException - the location given as a parameter is not on the same strand as the location. 063 *<br> 064 * IndexOutOfBoundsException - often means the operation caused the location to span the origin, ie 065 * be partially on positive and partially on negative strand. 066 *<br> 067 * @author Hanno Hinsch 068 */ 069public class Location implements Iterable<Location> 070{ 071 072 private int mStart; 073 private int mEnd; 074 075 076 /** 077 * Construct new location from coordinates. 078 * See package description of coordinate format. 079 * @param start Origin 0 index of first symbol. 080 * @param end Origin 0 index of last symbol + 1. 081 * @throws IllegalArgumentException End is not after start, or location spans the origin 082 */ 083 public Location( int start, int end ) 084 { 085 mStart= start; 086 mEnd= end; 087 088 if( !isHealthy() ) 089 { 090 throw new IllegalArgumentException( "Improper location parameters: (" + start + "," + end + ")" ); 091 } 092 093 } 094 095 /** 096 * Clone other location. 097 * @param other The location to clone. 098 */ 099 public Location( Location other ) 100 { 101 mStart= other.mStart; 102 mEnd= other.mEnd; 103 104 assert isHealthy(): toString(); 105 } 106 107 public int getBegin(){ 108 if(isNegative()) 109 return mEnd; 110 else 111 return mStart; 112 } 113 114 public int getEnd(){ 115 if(isNegative()) 116 return mStart; 117 else 118 return mEnd; 119 } 120 121 122 /** 123 * Create location from "biocoordinates", as in GFF file. In biocoordinates, 124 * the start index of a range is represented in origin 1 (ie the very first index is 1, not 0), 125 * and end= start + length - 1. 126 * 127 * @param start Origin 1 index of first symbol. 128 * @param end Origin 1 index of last symbol. 129 * @param strand '+' or '-' or '.' ('.' is interpreted as '+'). 130 * @return Created location. 131 * @throws IllegalArgumentException strand must be '+', '-' or '.' 132 */ 133 public static Location fromBio( int start, int end, char strand ) 134 { 135 int s= start - 1; 136 int e= end; 137 138 if( !( strand == '-' || strand == '+' || strand == '.' )) 139 { 140 throw new IllegalArgumentException( "Strand must be '+', '-', or '.'" ); 141 } 142 143 if( strand == '-' ) 144 { 145 //negate 146 s= - end; 147 e= - ( start - 1); 148 } 149 150 return new Location( s, e ); 151 } 152 153 /** 154 * Create a location from MAF file coordinates, which represent negative 155 * strand locations as the distance from the end of the sequence. 156 * 157 * @param start Origin 1 index of first symbol. 158 * @param length Number of symbols in range. 159 * @param strand '+' or '-' or '.' ('.' is interpreted as '+'). 160 * @param totalLength Total number of symbols in sequence. 161 * @throws IllegalArgumentException Strand must be '+', '-', '.' 162 * 163 */ 164 public static Location fromBioExt( int start, int length, char strand, int totalLength ) 165 { 166 int s= start; 167 int e= s + length; 168 169 if( !( strand == '-' || strand == '+' || strand == '.' )) 170 { 171 throw new IllegalArgumentException( "Strand must be '+', '-', or '.'" ); 172 } 173 174 if( strand == '-' ) 175 { 176 s= s - totalLength; 177 e= e - totalLength; 178 } 179 180 return new Location( s, e ); 181 } 182 183 /** 184 * Get character representation of strand. 185 * 186 * @return '+' or '-' 187 */ 188 public char bioStrand() 189 { 190 return ( isNegative() )?'-':'+'; 191 } 192 193 /** 194 * Get start index, in biocoordinates. 195 * 196 * @return The origin 1 index of the first symbol in location. 197 */ 198 public int bioStart() 199 { 200 return plus().start() + 1; 201 } 202 203 /** 204 * Get end index, in biocoordinates. 205 * 206 * @return The origin 1 index of the final symbol in location. 207 */ 208 public int bioEnd() 209 { 210 return plus().end(); 211 } 212 213 214 215 /** 216 * Return location that is in same position on plus strand. If location is already 217 * on plus strand, just return the location unchanged. 218 * 219 * @return Location on plus strand. 220 */ 221 public Location plus() 222 { 223 if( isNegative() ) 224 { 225 return opposite(); 226 } 227 else 228 { 229 return this; 230 } 231 } 232 233 /** 234 * Return location that is in same position on negative strand. If location is already 235 * on negative strand, just return the location unchanged. 236 * 237 * @return Location on negative strand. 238 */ 239 public Location minus() 240 { 241 if( isNegative() ) 242 { 243 return this; 244 } 245 else 246 { 247 return opposite(); 248 } 249 } 250 251 252 /** 253 * Return the union. 254 * <br> 255 * 256 * @param other The location to join. 257 * @return The union is a range that starts at the lesser of the two starting indices and ends at the 258 * greater of the two ends. 259 * @throws IllegalArgumentException Locations are on opposite strands. 260 */ 261 public Location union( Location other ) 262 { 263 264 if( !isSameStrand( other )) 265 { 266 throw new IllegalArgumentException( "Locations are on opposite strands." ); 267 } 268 else 269 { 270 int start= (other.mStart < mStart)? other.mStart: mStart; 271 int end= (other.mEnd > mEnd)? other.mEnd: mEnd; 272 273 return new Location( start, end ); 274 } 275 276 } 277 278 /** 279 * Return the intersection, or null if no overlap. 280 * 281 * @param other 282 * The location to intersect. 283 * @return The maximal location that is contained by both. Returns null if 284 * no overlap! 285 * @throws IllegalArgumentException 286 * Locations are on opposite strands. 287 */ 288 public Location intersection(Location other) { 289 if (isSameStrand(other)) { 290 return intersect(mStart, mEnd, other.mStart, other.mEnd); 291 } else { 292 throw new IllegalArgumentException("Locations are on opposite strands."); 293 } 294 } 295 296 private Location intersect(int a1, int a2, int b1, int b2) { 297 if (a1 > b1) { 298 return intersect(b1, b2, a1, a2); 299 } 300 // Safe to assume a1 <= b1 301 if (b1 >= a2) { 302 // b starts after a ends 303 return null; 304 } else if (b1 < a2 && b2 <= a2) { 305 // b starts after a starts and ends before or at where a ends 306 return new Location(b1, b2); 307 } else if (b1 >= a1 && a2 <= b2) { 308 // b starts after a but extends after the end of a 309 return new Location(b1, a2); 310 } 311 return null; 312 } 313 314 315 /** 316 * Get starting index (origin 0). 317 * 318 * @return The start index. 319 */ 320 public int start() 321 { 322 return mStart; 323 } 324 325 /** 326 * Get the ending index. 327 * 328 * @return The index of last symbol + 1 (remember Java half-open coordinates). 329 */ 330 public int end() 331 { 332 return mEnd; 333 } 334 335 /** 336 * Get length of range. 337 * 338 * @return The length of the range (end - start). 339 */ 340 public int length() 341 { 342 return mEnd - mStart; 343 } 344 345 346 /** 347 * Enable a "sliding window" iteration over a location 348 * to use with Java's "for" loop construct. 349 * The returned helper object implements the Iterable interface; the windowSize and increment semantics are implemented 350 * by an underlying LocIterator. 351 * <br><br> 352 * For example, given a location variable "loc": 353 *<br> 354<pre> 355 //use window size of 3 and increment of +3 356 for( Location temp: loc.window( 3, 3 )) 357 { 358 //at each iteration, temp will be the location of the next 3 symbols 359 } 360</pre> 361 * 362 * @param windowSize The number of symbols to get on each iteration. 363 * @param increment The direction and number of symbols to advance at each iteration. 364 * @return An anonymous iterable object to use with Java's for( ... ) loop construct. 365 */ 366 public Iterable<Location> window( final int windowSize, final int increment ) 367 { 368 final Location loc= this; 369 370 //return iterable anonymous inner class 371 return new Iterable<Location> () 372 { 373 @Override 374 public LocIterator iterator() 375 { 376 return new LocIterator( loc, windowSize, increment ); 377 } 378 379 }; 380 } 381 382 /** 383 * Create a location iterator over this location with a window size of 1 and 384 * an increment of +1 (successive symbols from start to end). 385 * 386 * @return An iterator over a Location (a LocIterator object). 387 */ 388 @Override 389 public LocIterator iterator() 390 { 391 return new LocIterator( this, 1, 1 ); 392 } 393 394 /** 395 * Create a location iterator over this location, 396 * using specified window size and increment. 397 * 398 * @param windowSize The number of symbols to get on each iteration. 399 * @param increment The direction and number of symbols to advance at each iteration. 400 * @return An iterator over a Location (a LocIterator object). 401 */ 402 public LocIterator iterator( int windowSize, int increment ) 403 { 404 return new LocIterator( this, windowSize, increment ); 405 } 406 407 408 /** 409 * The part of this location before the specified position. If position is negative, 410 * count backwards from the end. 411 * <br><br> 412 * For position >= 0, return Location( start, start + position ). 413 * <br> 414 * For position < 0, return Location( start, end + position ). 415 * <br> 416 * @return New location from start of this location to directly before position. 417 * @param position Where the prefix ends. 418 * @throws IndexOutOfBoundsException Specified prefix is longer than location. 419 */ 420 public Location prefix( int position ) 421 { 422 int end; 423 if( position >= 0 ) 424 { 425 if( (mStart + position <= mEnd) ) 426 { 427 end= mStart + position; 428 } 429 else 430 { 431 throw new IndexOutOfBoundsException( "Specified prefix longer than location." ); 432 } 433 } 434 else 435 { 436 if( (mEnd + position > mStart)) 437 { 438 end= mEnd + position; 439 } 440 else 441 { 442 throw new IndexOutOfBoundsException( "Specified prefix longer than location." ); 443 } 444 } 445 446 return new Location( mStart, end ); 447 } 448 449 450 /** 451 * The part of this location after the specified position. If position is negative, count backwards 452 * from the end. 453 * <br><br> 454 * For position >= 0, return Location( start + position, end ). 455 * <br> 456 * For position < 0, return Location( end - position, end ). 457 * <br> 458 * @return New location from position to end of this location. 459 * @param position Where the suffix starts. 460 * @throws IndexOutOfBoundsException Specified suffix is longer than location. 461 */ 462 public Location suffix( int position ) 463 { 464 int start; 465 if( position >= 0 ) 466 { 467 if( mStart + position <= mEnd ) // Scooter willis when 60 + 60 = 120 no remainder 468 { 469 start= mStart + position; 470 } 471 else 472 { 473 throw new IndexOutOfBoundsException( "Specified suffix longer than location." ); 474 } 475 } 476 else 477 { 478 if( mEnd + position >= mStart ) 479 { 480 start= mEnd + position; 481 } 482 else 483 { 484 throw new IndexOutOfBoundsException( "Specified suffix longer than location." ); 485 } 486 } 487 488 return new Location( start, mEnd ); 489 } 490 491 /** 492 * The part of this location before the other location (not inclusive). 493 * 494 * @param other The other location. 495 * @return The part of this location before the other location. 496 * @throws IllegalArgumentException Locations are on opposite strands. 497 * @throws IndexOutOfBoundsException This location does not contain other location. 498 */ 499 public Location prefix( Location other ) 500 { 501 502 if( isSameStrand( other ) ) 503 { 504 if( other.mStart >= mStart ) 505 { 506 return new Location( mStart, (other.mStart < mEnd)? other.mStart: mEnd ); 507 } 508 else 509 { 510 //other is out of bounds -- no prefix 511 throw new IndexOutOfBoundsException( "Specified location not within this location." ); 512 } 513 } 514 else 515 { 516 throw new IllegalArgumentException( "Locations are on opposite strands." ); 517 } 518 } 519 520 /** 521 * The part of this location after the other location (not inclusive). 522 * 523 * @param other The other location. 524 * @return The part of this location after the other location. 525 * @throws IllegalArgumentException Locations are on opposite strands. 526 * @throws IndexOutOfBoundsException This location does not contain other location. 527 */ 528 public Location suffix( Location other ) 529 { 530 if( isSameStrand( other )) 531 { 532 if( other.mEnd <= mEnd ) 533 { 534 return new Location( (other.mEnd > mStart)? other.mEnd: mStart, mEnd ); 535 } 536 else 537 { 538 //other is out of bounds -- no suffix 539 throw new IndexOutOfBoundsException( "Specified location not within this location." ); 540 } 541 } 542 else 543 { 544 throw new IllegalArgumentException( "Locations are on opposite strands." ); 545 } 546 547 } 548 549 /** 550 * Return the adjacent location of specified length directly upstream of this location. 551 * 552 * @return Upstream location. 553 * @param length The length of the upstream location. 554 * @throws IndexOutOfBoundsException Specified length causes crossing of origin. 555 */ 556 public Location upstream( int length ) 557 { 558 if( length < 0 ) 559 { 560 throw new IllegalArgumentException( "Parameter must be >= 0; is=" + length ); 561 } 562 563 if( Math.signum( mStart - length) == Math.signum( mStart ) || 0 == Math.signum( mStart - length ) ) 564 { 565 return new Location(mStart - length, mStart ); 566 } 567 else 568 { 569 throw new IndexOutOfBoundsException( "Specified length causes crossing of origin: " + length + "; " + toString() ); 570 } 571 } 572 573 /** 574 * Return the adjacent location of specified length directly downstream of this location. 575 * 576 * @return The downstream location. 577 * @param length The length of the downstream location. 578 * @throws IndexOutOfBoundsException Specified length causes crossing of origin. 579 */ 580 public Location downstream( int length ) 581 { 582 if( length < 0 ) 583 { 584 throw new IllegalArgumentException( "Parameter must be >= 0; is=" + length ); 585 } 586 587 if( Math.signum( mEnd + length) == Math.signum( mEnd ) || 0 == Math.signum( mEnd + length ) ) 588 { 589 return new Location( mEnd, mEnd + length ); 590 } 591 else 592 { 593 throw new IndexOutOfBoundsException( "Specified length causes crossing of origin: " + length + "; " + toString() ); 594 } 595 596 } 597 598 599 600 /** 601 * Return distance between this location and the other location. 602 * 603 * Distance is defined only if both locations are on same strand. 604 * 605 * @param other The location to compare. 606 * @return The integer distance. Returns -1 if they overlap; 0 if directly adjacent. 607 * @throws IllegalArgumentException Locations are on opposite strands. 608 */ 609 public int distance( Location other ) 610 { 611 if( isSameStrand( other )) 612 { 613 if( overlaps( other )) 614 { 615 return -1; 616 } 617 else 618 { 619 return ( mEnd <= other.mStart )? (other.mStart - mEnd) : (mStart - other.mEnd); 620 } 621 } 622 else 623 { 624 throw new IllegalArgumentException( "Locations are on opposite strands." ); 625 } 626 } 627 628 /** 629 * Return percent overlap of two locations. 630 * 631 * @param other The location to compare. 632 * @return 100.0 * intersection(other).length() / this.length() 633 * @throws IllegalArgumentException Locations are on opposite strands. 634 */ 635 public double percentOverlap( Location other ) 636 { 637 if( length() > 0 && overlaps( other )) 638 { 639 return 100.0 * (((double) intersection( other ).length()) / (double) length()); 640 } 641 else 642 { 643 return 0; 644 } 645 } 646 647 /** 648 * Check if this location and other location overlap. 649 * 650 * @param other The location to compare. 651 * @return True if they overlap. 652 * @throws IllegalArgumentException Locations are on opposite strands. 653 */ 654 public boolean overlaps( Location other ) 655 { 656 if( isSameStrand( other )) 657 { 658 return !( mStart >= other.mEnd || mEnd <= other.mStart ); 659 } 660 else 661 { 662 throw new IllegalArgumentException( "Locations are on opposite strands." ); 663 } 664 } 665 666 /** 667 * Check if this location contains the other. 668 * 669 * @param other The location to compare. 670 * @return True if other is entirely contained by this location. 671 * @throws IllegalArgumentException Locations are on opposite strands. 672 */ 673 public boolean contains( Location other ) 674 { 675 if( isSameStrand( other )) 676 { 677 return ( mStart <= other.mStart && mEnd >= other.mEnd ); 678 } 679 else 680 { 681 throw new IllegalArgumentException( "Locations are on opposite strands." ); 682 } 683 } 684 685 686 /** 687 * Check if this location starts after the other location starts. 688 * The locations may overlap. 689 * 690 * @param other The location to compare. 691 * @return True if this starts after other. 692 * @throws IllegalArgumentException Locations are on opposite strands. 693 */ 694 public boolean startsAfter( Location other ) 695 { 696 if( isSameStrand( other )) 697 { 698 return mStart > other.mStart; 699 } 700 else 701 { 702 throw new IllegalArgumentException( "Locations are on opposite strands." ); 703 } 704 } 705 706 /** 707 * Check if this location starts before other location starts. 708 * The locations may overlap. 709 * 710 * @param other The location to compare. 711 * @return True if this starts before other. 712 * @throws IllegalArgumentException Locations are on opposite strands. 713 */ 714 public boolean startsBefore( Location other ) 715 { 716 if( isSameStrand( other )) 717 { 718 return mStart < other.mStart; 719 } 720 else 721 { 722 throw new IllegalArgumentException( "Locations are on opposite strands." ); 723 } 724 } 725 726 /** 727 * Check if this location ends after other location ends. 728 * The locations may overlap. 729 * 730 * @param other The location to compare. 731 * @return True if location ends after other. 732 * @throws IllegalArgumentException Locations are on opposite strands. 733 */ 734 public boolean endsAfter( Location other ) 735 { 736 if( isSameStrand( other ) ) 737 { 738 return mEnd > other.mEnd; 739 } 740 else 741 { 742 throw new IllegalArgumentException( "Locations are on opposite strands." ); 743 } 744 } 745 746 /** 747 * Check if this location ends before other location ends. 748 * The locations may overlap. 749 * 750 * @param other The location to compare. 751 * @return True if this ends before other. 752 * @throws IllegalArgumentException Locations are on opposite strands. 753 */ 754 public boolean endsBefore( Location other ) 755 { 756 if( isSameStrand( other ) ) 757 { 758 return mEnd < other.mEnd; 759 } 760 else 761 { 762 throw new IllegalArgumentException( "Locations are on opposite strands." ); 763 } 764 } 765 766 /** 767 * Check if this location is entirely after the other location (no overlap). 768 * 769 * @param other The location to compare. 770 * @return True if this is after other. 771 * @throws IllegalArgumentException Locations are on opposite strands. 772 */ 773 public boolean isAfter( Location other ) 774 { 775 if( isSameStrand( other ) ) 776 { 777 return mStart >= other.mEnd; 778 } 779 else 780 { 781 throw new IllegalArgumentException( "Locations are on opposite strands." ); 782 } 783 } 784 785 /** 786 * Check if this location is entirely before other location (no overlap). 787 * 788 * @param other The location to compare. 789 * @return True if this is before other. 790 * @throws IllegalArgumentException Locations are on opposite strands. 791 */ 792 public boolean isBefore( Location other ) 793 { 794 if( isSameStrand( other ) ) 795 { 796 return mEnd <= other.mStart; 797 } 798 else 799 { 800 throw new IllegalArgumentException( "Locations are on opposite strands." ); 801 } 802 } 803 804 /** 805 * Check if location is on negative strand. 806 * Note that Location( 0, 0 ) is by construction defined to be on the 807 * positive strand. 808 * 809 * @return True if on negative (reverse) strand. 810 */ 811 public boolean isNegative() 812 { 813 return ( mStart <= 0 && mEnd <= 0 ); 814 } 815 816 /** 817 * Return location that is in same position on opposite strand. 818 * 819 * @return Location on opposite strand. 820 */ 821 public Location opposite() 822 { 823 return new Location( - mEnd, - mStart ); 824 } 825 826 /** 827 * Check if this location is on same strand as other location. 828 * 829 * @param other The location to compare. 830 * @return True if on same strand. 831 */ 832 public boolean isSameStrand( Location other ) 833 { 834 return ( isNegative() && other.isNegative() ) || ( !isNegative() && !other.isNegative() ); 835 } 836 837 838 /** 839 * Return a string representation of location. 840 * 841 * @return Text string. 842 */ 843 @Override 844 public String toString() 845 { 846 return "[L=" + (mEnd - mStart) + "; S=" + mStart + "; E=" + mEnd +"]"; 847 } 848 849 /* (non-Javadoc) 850 * @see java.lang.Object#hashCode() 851 */ 852 @Override 853 public int hashCode() { 854 final int prime = 31; 855 int result = 1; 856 result = prime * result + mEnd; 857 result = prime * result + mStart; 858 return result; 859 } 860 861 /* (non-Javadoc) 862 * @see java.lang.Object#equals(java.lang.Object) 863 */ 864 @Override 865 public boolean equals(Object obj) { 866 if (this == obj) 867 return true; 868 if (obj == null) 869 return false; 870 if (getClass() != obj.getClass()) 871 return false; 872 Location other = (Location) obj; 873 if (mEnd != other.mEnd) 874 return false; 875 if (mStart != other.mStart) 876 return false; 877 return true; 878 } 879 880 /** 881 * 882 */ 883 private boolean isHealthy() 884 { 885 return ( mStart <= mEnd ) && (( mStart <= 0 && mEnd <= 0 ) || (mStart >= 0 && mEnd >= 0)); 886 } 887 888}