001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojavax.bio.seq; 023 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.Iterator; 027import java.util.List; 028 029import org.biojava.bio.symbol.FuzzyLocation; 030import org.biojava.bio.symbol.FuzzyPointLocation; 031import org.biojava.bio.symbol.Location; 032import org.biojava.bio.symbol.MergeLocation; 033import org.biojava.bio.symbol.PointLocation; 034import org.biojava.bio.symbol.RangeLocation; 035import org.biojava.utils.ChangeType; 036import org.biojava.utils.ChangeVetoException; 037import org.biojavax.CrossRef; 038import org.biojavax.CrossReferenceResolver; 039import org.biojavax.RichAnnotatable; 040import org.biojavax.ontology.ComparableTerm; 041 042/** 043 * Describes locations, and adds the concepts of circularity, fuzziness, 044 * annotations, and cross-references to other databases. Also includes strands. 045 * 046 * @author Richard Holland 047 * @author George Waldon - bug fix 048 * @since 1.5 049 */ 050public interface RichLocation extends Location, RichAnnotatable, Comparable { 051 052 public static final ChangeType NOTE = new ChangeType( 053 "This location's notes have changed", 054 "org.biojavax.bio.seq.RichLocation", "NOTE"); 055 public static final ChangeType TERM = new ChangeType( 056 "This location's term has changed", 057 "org.biojavax.bio.seq.RichLocation", "TERM"); 058 public static final ChangeType RANK = new ChangeType( 059 "This location's rank has changed", 060 "org.biojavax.bio.seq.RichLocation", "RANK"); 061 public static final ChangeType CIRCULAR = new ChangeType( 062 "This location's circularity has changed", 063 "org.biojavax.bio.seq.RichLocation", "CIRCULAR"); 064 public static final ChangeType FEATURE = new ChangeType( 065 "This location's parent feature has changed", 066 "org.biojavax.bio.seq.RichLocation", "FEATURE"); 067 068 /** 069 * The empty location matches nothing. 070 */ 071 public static final RichLocation EMPTY_LOCATION = new EmptyRichLocation(); 072 073 /** 074 * Sorts the member locations of a compound location. Does nothing for 075 * non-compound locations. Sorting depends on the compareTo() method of the 076 * member locations - usually they will be sorted by their start position. 077 * This might be useful when used with the location returned by a union or 078 * intersection, or when setting the term of a compound location to 079 * ORDER_TERM. 080 */ 081 public void sort(); 082 083 /** 084 * Retrieves the feature this location is associated with. May be null. 085 * 086 * @return the feature. 087 */ 088 public RichFeature getFeature(); 089 090 /** 091 * Sets the feature this location is associated with. If null, that's fine, 092 * but you won't be able to persist it to the database until you give it a 093 * not-null value. 094 * 095 * @param feature 096 * the feature. 097 */ 098 public void setFeature(RichFeature feature) throws ChangeVetoException; 099 100 /** 101 * Retrieves the crossref associated with this location. 102 * 103 * @return the crossref. 104 */ 105 public CrossRef getCrossRef(); 106 107 /** 108 * Retrieves the term associated with this location. 109 * 110 * @return the term. 111 */ 112 public ComparableTerm getTerm(); 113 114 /** 115 * Sets the term for this location. 116 * 117 * @param term 118 * the term this location should adopt. 119 * @throws ChangeVetoException 120 * in case of error. 121 */ 122 public void setTerm(ComparableTerm term) throws ChangeVetoException; 123 124 /** 125 * Retrieves the strand associated with this location. 126 * 127 * @return the strand. 128 */ 129 public Strand getStrand(); 130 131 /** 132 * Retrieves the rank associated with this location. 133 * 134 * @return the rank. 135 */ 136 public int getRank(); 137 138 /** 139 * Sets the rank for this location. 140 * 141 * @param rank 142 * the rank this location should adopt. 143 * @throws ChangeVetoException 144 * in case of error. 145 */ 146 public void setRank(int rank) throws ChangeVetoException; 147 148 /** 149 * Retrieves the start position of this location. 150 * 151 * @return the position. 152 */ 153 public Position getMinPosition(); 154 155 /** 156 * Retrieves the end position of this location. 157 * 158 * @return the position. 159 */ 160 public Position getMaxPosition(); 161 162 /** 163 * Sets the resolver to use when working out actual base coordinates from 164 * fuzzy positions. 165 * 166 * @param p 167 * the position resolver to use. 168 */ 169 public void setPositionResolver(PositionResolver p); 170 171 /** 172 * Retrieves the circular length of this location. If it is 0, the location 173 * is not circular. If it is not zero, then the number refers to the 174 * wrapping length of the location. eg. 100 would signify that a position of 175 * 112 would actually be a position of 112-100 = 12. 176 * 177 * @return the position. 178 */ 179 public int getCircularLength(); 180 181 /** 182 * Sets the circular length of this location. If it is 0, the location is 183 * not circular. If it is not zero, then the number refers to the wrapping 184 * length of the location. eg. 100 would signify that a position of 112 185 * would actually be a position of 112-100 = 12. 186 * 187 * @param sourceSeqLength 188 * the circular length of this location 189 * @throws ChangeVetoException 190 * if it doesn't want to change. 191 */ 192 public void setCircularLength(int sourceSeqLength) 193 throws ChangeVetoException; 194 195 /** 196 * Sets the cross ref resolver to use when retrieving remote symbols. If 197 * none is given, then the default from 198 * RichObjectFactory.getDefaultCrossRefResolver() is used. 199 * 200 * @param r 201 * the resolver to use. 202 */ 203 public void setCrossRefResolver(CrossReferenceResolver r); 204 205 /** 206 * This class represents a strand on which a location may lie. Three strands 207 * are defined by default - UNKNOWN, NEGATIVE, and POSITIVE. 208 */ 209 public static class Strand implements Comparable { 210 211 private String name; 212 private int value; 213 214 /** 215 * The positive strand is represented by the symbol '+' and has the 216 * number 1. 217 */ 218 public static final Strand POSITIVE_STRAND = new Strand("+", 1); 219 220 /** 221 * The negative strand is represented by the symbol '-' and has the 222 * number -1. 223 */ 224 public static final Strand NEGATIVE_STRAND = new Strand("-", -1); 225 226 /** 227 * The unknown strand is represented by the symbol '?' and has the 228 * number 0. 229 */ 230 public static final Strand UNKNOWN_STRAND = new Strand("?", 0); 231 232 /** 233 * Returns the strand object that matches the number given. Throws an 234 * exception if it could not recognise the number. Number is usually 235 * 1,-1,0. 236 * 237 * @param value 238 * the number of the strand. 239 * @return the strand matching that number. 240 */ 241 public static Strand forValue(int value) { 242 switch (value) { 243 case 1: 244 return POSITIVE_STRAND; 245 case 0: 246 return UNKNOWN_STRAND; 247 case -1: 248 return NEGATIVE_STRAND; 249 default: 250 throw new IllegalArgumentException("Unknown strand type: " 251 + value); 252 } 253 } 254 255 /** 256 * Returns the strand object that matches the symbol given. Throws an 257 * exception if it could not recognise the symbol. Symbol is usually 258 * +,-,?. 259 * 260 * @param name 261 * the symbol of the strand. 262 * @return the strand matching that symbol. 263 */ 264 public static Strand forName(String name) { 265 if (name.equals("+")) 266 return POSITIVE_STRAND; 267 else if (name.equals("?")) 268 return UNKNOWN_STRAND; 269 else if (name.equals(".")) 270 return UNKNOWN_STRAND; 271 else if (name.equals("-")) 272 return NEGATIVE_STRAND; 273 else 274 throw new IllegalArgumentException("Unknown strand type: " 275 + name); 276 } 277 278 // creates a strand with the given number and value 279 private Strand(String name, int value) { 280 this.name = name; 281 this.value = value; 282 } 283 284 /** 285 * Returns the numeric value of this strand. 286 * 287 * @return the numeric value. 288 */ 289 public int intValue() { 290 return this.value; 291 } 292 293 /** 294 * Returns the string symbol of this strand. 295 * 296 * @return the string symbol. 297 */ 298 public String getName() { 299 return this.name; 300 } 301 302 /** 303 * {@inheritDoc} Form: "symbol" (eg. +,-,?) 304 */ 305 public String toString() { 306 return this.name; 307 } 308 309 /** 310 * {@inheritDoc} 311 */ 312 public int hashCode() { 313 int code = 17; 314 code = 31 * code + this.name.hashCode(); 315 code = 31 * code + this.value; 316 return code; 317 } 318 319 /** 320 * {@inheritDoc} Strands are equal if their numbers and symbols match. 321 */ 322 public boolean equals(Object o) { 323 if (!(o instanceof Strand)) 324 return false; 325 if (o == this) 326 return true; 327 Strand them = (Strand) o; 328 if (!them.toString().equals(this.name)) 329 return false; 330 if (them.intValue() != this.value) 331 return false; 332 return true; 333 } 334 335 /** 336 * {@inheritDoc} Strands are compared first by symbol, then by number. 337 */ 338 public int compareTo(Object o) { 339 Strand fo = (Strand) o; 340 if (!this.name.equals(fo.toString())) 341 return this.name.compareTo(fo.toString()); 342 return this.value - fo.intValue(); 343 } 344 } 345 346 /** 347 * Some useful tools for working with Locations. 348 */ 349 public static class Tools { 350 351 // because we are static, we don't want to get instantiated 352 private Tools() { 353 } 354 355 /** 356 * Constructs a RichLocation object based on the given collection of 357 * members. It the collection contains a single location, that is 358 * returned. If it contains multiple locations it returns a 359 * CompoundRichLocation covering them all, with the default term 360 * associated. It returns the empty location if the collection was 361 * empty. 362 * 363 * @param members 364 * the members to construct a location from. 365 * @return the corresponding RichLocation 366 */ 367 public static RichLocation construct(Collection<Location> members) { 368 if (members.size() == 0) 369 return RichLocation.EMPTY_LOCATION; 370 else if (members.size() == 1){ 371 Location loc = members.toArray(new Location[0])[0]; 372 return RichLocation.Tools.enrich(loc); 373 } else if (isMultiSource(members)) 374 return new MultiSourceCompoundRichLocation(members); 375 else 376 return new CompoundRichLocation(members); 377 } 378 379 /** 380 * Returns false if all the locations in the set are from the same 381 * strand of the same sequence. 382 * 383 * @param members 384 * the set of locations to check. 385 * @return true if they are from multiple sources. 386 */ 387 public static boolean isMultiSource(Collection<Location> members) { 388 RichLocation previous = null; 389 for (Iterator<Location> i = members.iterator(); i.hasNext();) { 390 RichLocation rl = enrich(i.next()); 391 if (previous == null) 392 previous = rl; 393 else { 394 if (previous.getCircularLength() != rl.getCircularLength()) 395 return true; 396 if ((previous.getCrossRef() == null && rl.getCrossRef() != null) 397 || (previous.getCrossRef() != null && rl 398 .getCrossRef() == null) 399 || (previous.getCrossRef() != rl.getCrossRef() && !previous 400 .getCrossRef().equals(rl.getCrossRef()))) 401 return true; 402 if ((previous.getStrand() == null && rl.getStrand() != null) 403 || (previous.getStrand() != null && rl.getStrand() == null) 404 || (previous.getStrand() != rl.getStrand() && !previous 405 .getStrand().equals(rl.getStrand()))) 406 return true; 407 } 408 } 409 return false; 410 } 411 412 /** 413 * Takes a set of locations and tries to merge all pairs where the union 414 * operation results in a simple rich location, not a compound one. 415 * 416 * @param members 417 * the members to merge 418 * @return the resulting merged set, which may have only one location in 419 * it. 420 */ 421 public static Collection<Location> merge(Collection<Location> members) { 422 // flatten them out first so we don't end up recursing 423 List<Location> membersList = new ArrayList<Location>( 424 flatten(members)); 425 // all members are now singles so we can use single vs single union 426 // operations 427 if (membersList.size() > 1) { 428 for (int p = 0; p < (membersList.size() - 1); p++) { 429 Location parent = membersList.get(p); 430 for (int c = p + 1; c < membersList.size(); c++) { 431 Location child = membersList.get(c); 432 Location union = parent.union(child); 433 // if parent can merge with child 434 if (union.isContiguous()) { 435 // replace parent with union 436 membersList.set(p, union); 437 // remove child 438 membersList.remove(c); 439 // check all children again 440 p--; 441 break; 442 } 443 } 444 } 445 } 446 return membersList; 447 } 448 449 /** 450 * Takes a location and returns the set of all members. If any members 451 * are compound, it flattens them too. 452 * 453 * @param location 454 * the location to flatten 455 * @return the flattened collection of members. 456 */ 457 public static Collection<Location> flatten(RichLocation location) { 458 List<Location> members = new ArrayList<Location>(); 459 for (Iterator<Location> i = location.blockIterator(); i.hasNext();) 460 members.add(i.next()); 461 return flatten(members); 462 } 463 464 /** 465 * Takes a set of locations and returns the set of all members. If any 466 * members are compound, it flattens them too. 467 * 468 * @param members 469 * the locations to flatten 470 * @return the flattened collection of members. 471 */ 472 public static Collection<Location> flatten(Collection<Location> members) { 473 List<Location> flattened = new ArrayList<Location>(members); 474 for (int i = 0; i < flattened.size(); i++) { 475 Location member = flattened.get(i); 476 if (!member.isContiguous()) { 477 flattened.remove(i); 478 int insertPos = i; 479 for (Iterator<Location> j = member.blockIterator(); j 480 .hasNext();) 481 flattened.add(insertPos++, j.next()); 482 i--; 483 } 484 } 485 return flattened; 486 } 487 488 /** 489 * Takes a start and end position on a circular location of given 490 * length, and shifts them left along the sequence until they sit at the 491 * earliest possible point where they still would represent the same 492 * sequence. 493 * 494 * @param start 495 * the start of the circular location 496 * @param end 497 * the end of the circular location 498 * @param seqLength 499 * the circular length of the sequence underlying the 500 * location 501 * @return an integer array where [0] is the translated start and [1] 502 * the end. 503 */ 504 public static int[] modulateCircularLocation(int start, int end, 505 int seqLength) { 506 // Dummy case for non-circular sequences. 507 if (seqLength == 0) 508 return new int[] { start, end }; 509 // Move the end to after the start. 510 while (end < start) 511 end += seqLength; 512 // Calculate the length. 513 int locationLength = end - start; 514 // Move the start back till it can go no further 515 while (start > seqLength) 516 start -= seqLength; 517 // Move the end back. 518 end = start + locationLength; 519 // Return results. 520 return new int[] { start, end }; 521 } 522 523 /** 524 * Takes two circular locations of given length, and shifts them left 525 * along the sequence until they sit at the earliest possible point 526 * where they still would represent the same sequence. The end result 527 * ensures that simple overlap calculations will always work on the 528 * coordinates returned. 529 * 530 * @param a 531 * the first location to shift 532 * @param b 533 * the second location to shift 534 * @param seqLength 535 * the circular length of the sequence underlying the 536 * location 537 * @return an integer array where [0] is the translated start and [1] 538 * the end of location a, and [2] and [3] are the translated 539 * start and end of location b. 540 */ 541 public static int[] modulateCircularLocationPair(Location a, 542 Location b, int seqLength) { 543 // Dummy case for non-circular locations. 544 if (seqLength == 0) 545 return new int[] { a.getMin(), a.getMax(), b.getMin(), 546 b.getMax() }; 547 // Modulate our start/end to shortest possible equivalent region 548 int[] aParts = modulateCircularLocation(a.getMin(), a.getMax(), 549 seqLength); 550 int aStart = aParts[0]; 551 int aEnd = aParts[1]; 552 // Modulate their start/end to shortest possible equivalent region 553 int[] bParts = modulateCircularLocation(b.getMin(), b.getMax(), 554 seqLength); 555 int bStart = bParts[0]; 556 int bEnd = bParts[1]; 557 // If we wrap and the point we are checking for is before our start, 558 // increment it by circularLength length 559 if (aEnd > seqLength && bStart < aStart) { 560 bStart += seqLength; 561 bEnd += seqLength; 562 } 563 return new int[] { aStart, aEnd, bStart, bEnd }; 564 } 565 566 /** 567 * Takes a point on a circular location and moves it left until it falls 568 * at the earliest possible point that represents the same base. 569 * 570 * @param index 571 * the point on the location to shift 572 * @param seqLength 573 * the size of the circular location 574 * @return the shifted point 575 */ 576 public static int modulateCircularIndex(int index, int seqLength) { 577 // Dummy case 578 if (seqLength == 0) 579 return index; 580 // Modulate 581 while (index > seqLength) 582 index -= seqLength; 583 return index; 584 } 585 586 /** 587 * Attempts to convert a plain Location into a RichLocation. 588 * 589 * @param l 590 * the location to convert 591 * @return the converted location 592 */ 593 public static RichLocation enrich(Location l) { 594 // Dummy case where location is already enriched 595 if (l instanceof RichLocation) { 596 return (RichLocation) l; 597 } 598 // Compound case 599 else if (l instanceof MergeLocation || !l.isContiguous()) { 600 List<Location> members = new ArrayList<Location>(); 601 for (Iterator<Location> i = l.blockIterator(); i.hasNext();) { 602 Location member = i.next(); 603 members.add(enrich(member)); 604 } 605 return RichLocation.Tools.construct(RichLocation.Tools 606 .merge(members)); 607 } 608 // Fuzzy single points 609 else if (l instanceof FuzzyPointLocation) { 610 FuzzyPointLocation f = (FuzzyPointLocation) l; 611 Position pos = new SimplePosition(f.hasBoundedMin(), f 612 .hasBoundedMax(), f.getMin(), f.getMax(), 613 Position.IN_RANGE); 614 return new SimpleRichLocation(pos, 0); // 0 for no rank 615 } 616 // Fuzzy ranges 617 else if (l instanceof FuzzyLocation) { 618 FuzzyLocation f = (FuzzyLocation) l; 619 Position start = new SimplePosition(f.hasBoundedMin(), false, f 620 .getMin()); 621 Position end = new SimplePosition(false, f.hasBoundedMax(), f 622 .getMax()); 623 return new SimpleRichLocation(start, end, 0); // 0 for no rank 624 } 625 // Normal ranges 626 else if (l instanceof RangeLocation) { 627 RangeLocation r = (RangeLocation) l; 628 Position start = new SimplePosition(false, false, r.getMin()); 629 Position end = new SimplePosition(false, false, r.getMax()); 630 return new SimpleRichLocation(start, end, 0); // 0 for no rank 631 } 632 // Normal points 633 else if (l instanceof PointLocation) { 634 PointLocation p = (PointLocation) l; 635 Position pos = new SimplePosition(false, false, p.getMin()); 636 return new SimpleRichLocation(pos, 0); // 0 for no rank 637 } 638 // Empty locations 639 else if (l.toString().equals("{}")) { 640 return EMPTY_LOCATION; 641 } 642 // All other cases 643 else { 644 throw new IllegalArgumentException( 645 "Unable to enrich locations of type " + l.getClass()); 646 } 647 } 648 } 649}