001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.bio.seq;
023
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.Iterator;
027import java.util.List;
028
029import org.biojava.bio.symbol.FuzzyLocation;
030import org.biojava.bio.symbol.FuzzyPointLocation;
031import org.biojava.bio.symbol.Location;
032import org.biojava.bio.symbol.MergeLocation;
033import org.biojava.bio.symbol.PointLocation;
034import org.biojava.bio.symbol.RangeLocation;
035import org.biojava.utils.ChangeType;
036import org.biojava.utils.ChangeVetoException;
037import org.biojavax.CrossRef;
038import org.biojavax.CrossReferenceResolver;
039import org.biojavax.RichAnnotatable;
040import org.biojavax.ontology.ComparableTerm;
041
042/**
043 * Describes locations, and adds the concepts of circularity, fuzziness,
044 * annotations, and cross-references to other databases. Also includes strands.
045 * 
046 * @author Richard Holland
047 * @author George Waldon - bug fix
048 * @since 1.5
049 */
050public interface RichLocation extends Location, RichAnnotatable, Comparable {
051
052        public static final ChangeType NOTE = new ChangeType(
053                        "This location's notes have changed",
054                        "org.biojavax.bio.seq.RichLocation", "NOTE");
055        public static final ChangeType TERM = new ChangeType(
056                        "This location's term has changed",
057                        "org.biojavax.bio.seq.RichLocation", "TERM");
058        public static final ChangeType RANK = new ChangeType(
059                        "This location's rank has changed",
060                        "org.biojavax.bio.seq.RichLocation", "RANK");
061        public static final ChangeType CIRCULAR = new ChangeType(
062                        "This location's circularity has changed",
063                        "org.biojavax.bio.seq.RichLocation", "CIRCULAR");
064        public static final ChangeType FEATURE = new ChangeType(
065                        "This location's parent feature has changed",
066                        "org.biojavax.bio.seq.RichLocation", "FEATURE");
067
068        /**
069         * The empty location matches nothing.
070         */
071        public static final RichLocation EMPTY_LOCATION = new EmptyRichLocation();
072
073        /**
074         * Sorts the member locations of a compound location. Does nothing for
075         * non-compound locations. Sorting depends on the compareTo() method of the
076         * member locations - usually they will be sorted by their start position.
077         * This might be useful when used with the location returned by a union or
078         * intersection, or when setting the term of a compound location to
079         * ORDER_TERM.
080         */
081        public void sort();
082
083        /**
084         * Retrieves the feature this location is associated with. May be null.
085         * 
086         * @return the feature.
087         */
088        public RichFeature getFeature();
089
090        /**
091         * Sets the feature this location is associated with. If null, that's fine,
092         * but you won't be able to persist it to the database until you give it a
093         * not-null value.
094         * 
095         * @param feature
096         *            the feature.
097         */
098        public void setFeature(RichFeature feature) throws ChangeVetoException;
099
100        /**
101         * Retrieves the crossref associated with this location.
102         * 
103         * @return the crossref.
104         */
105        public CrossRef getCrossRef();
106
107        /**
108         * Retrieves the term associated with this location.
109         * 
110         * @return the term.
111         */
112        public ComparableTerm getTerm();
113
114        /**
115         * Sets the term for this location.
116         * 
117         * @param term
118         *            the term this location should adopt.
119         * @throws ChangeVetoException
120         *             in case of error.
121         */
122        public void setTerm(ComparableTerm term) throws ChangeVetoException;
123
124        /**
125         * Retrieves the strand associated with this location.
126         * 
127         * @return the strand.
128         */
129        public Strand getStrand();
130
131        /**
132         * Retrieves the rank associated with this location.
133         * 
134         * @return the rank.
135         */
136        public int getRank();
137
138        /**
139         * Sets the rank for this location.
140         * 
141         * @param rank
142         *            the rank this location should adopt.
143         * @throws ChangeVetoException
144         *             in case of error.
145         */
146        public void setRank(int rank) throws ChangeVetoException;
147
148        /**
149         * Retrieves the start position of this location.
150         * 
151         * @return the position.
152         */
153        public Position getMinPosition();
154
155        /**
156         * Retrieves the end position of this location.
157         * 
158         * @return the position.
159         */
160        public Position getMaxPosition();
161
162        /**
163         * Sets the resolver to use when working out actual base coordinates from
164         * fuzzy positions.
165         * 
166         * @param p
167         *            the position resolver to use.
168         */
169        public void setPositionResolver(PositionResolver p);
170
171        /**
172         * Retrieves the circular length of this location. If it is 0, the location
173         * is not circular. If it is not zero, then the number refers to the
174         * wrapping length of the location. eg. 100 would signify that a position of
175         * 112 would actually be a position of 112-100 = 12.
176         * 
177         * @return the position.
178         */
179        public int getCircularLength();
180
181        /**
182         * Sets the circular length of this location. If it is 0, the location is
183         * not circular. If it is not zero, then the number refers to the wrapping
184         * length of the location. eg. 100 would signify that a position of 112
185         * would actually be a position of 112-100 = 12.
186         * 
187         * @param sourceSeqLength
188         *            the circular length of this location
189         * @throws ChangeVetoException
190         *             if it doesn't want to change.
191         */
192        public void setCircularLength(int sourceSeqLength)
193                        throws ChangeVetoException;
194
195        /**
196         * Sets the cross ref resolver to use when retrieving remote symbols. If
197         * none is given, then the default from
198         * RichObjectFactory.getDefaultCrossRefResolver() is used.
199         * 
200         * @param r
201         *            the resolver to use.
202         */
203        public void setCrossRefResolver(CrossReferenceResolver r);
204
205        /**
206         * This class represents a strand on which a location may lie. Three strands
207         * are defined by default - UNKNOWN, NEGATIVE, and POSITIVE.
208         */
209        public static class Strand implements Comparable {
210
211                private String name;
212                private int value;
213
214                /**
215                 * The positive strand is represented by the symbol '+' and has the
216                 * number 1.
217                 */
218                public static final Strand POSITIVE_STRAND = new Strand("+", 1);
219
220                /**
221                 * The negative strand is represented by the symbol '-' and has the
222                 * number -1.
223                 */
224                public static final Strand NEGATIVE_STRAND = new Strand("-", -1);
225
226                /**
227                 * The unknown strand is represented by the symbol '?' and has the
228                 * number 0.
229                 */
230                public static final Strand UNKNOWN_STRAND = new Strand("?", 0);
231
232                /**
233                 * Returns the strand object that matches the number given. Throws an
234                 * exception if it could not recognise the number. Number is usually
235                 * 1,-1,0.
236                 * 
237                 * @param value
238                 *            the number of the strand.
239                 * @return the strand matching that number.
240                 */
241                public static Strand forValue(int value) {
242                        switch (value) {
243                        case 1:
244                                return POSITIVE_STRAND;
245                        case 0:
246                                return UNKNOWN_STRAND;
247                        case -1:
248                                return NEGATIVE_STRAND;
249                        default:
250                                throw new IllegalArgumentException("Unknown strand type: "
251                                                + value);
252                        }
253                }
254
255                /**
256                 * Returns the strand object that matches the symbol given. Throws an
257                 * exception if it could not recognise the symbol. Symbol is usually
258                 * +,-,?.
259                 * 
260                 * @param name
261                 *            the symbol of the strand.
262                 * @return the strand matching that symbol.
263                 */
264                public static Strand forName(String name) {
265                        if (name.equals("+"))
266                                return POSITIVE_STRAND;
267                        else if (name.equals("?"))
268                                return UNKNOWN_STRAND;
269                        else if (name.equals("."))
270                                return UNKNOWN_STRAND;
271                        else if (name.equals("-"))
272                                return NEGATIVE_STRAND;
273                        else
274                                throw new IllegalArgumentException("Unknown strand type: "
275                                                + name);
276                }
277
278                // creates a strand with the given number and value
279                private Strand(String name, int value) {
280                        this.name = name;
281                        this.value = value;
282                }
283
284                /**
285                 * Returns the numeric value of this strand.
286                 * 
287                 * @return the numeric value.
288                 */
289                public int intValue() {
290                        return this.value;
291                }
292
293                /**
294                 * Returns the string symbol of this strand.
295                 * 
296                 * @return the string symbol.
297                 */
298                public String getName() {
299                        return this.name;
300                }
301
302                /**
303                 * {@inheritDoc} Form: "symbol" (eg. +,-,?)
304                 */
305                public String toString() {
306                        return this.name;
307                }
308
309                /**
310                 * {@inheritDoc}
311                 */
312                public int hashCode() {
313                        int code = 17;
314                        code = 31 * code + this.name.hashCode();
315                        code = 31 * code + this.value;
316                        return code;
317                }
318
319                /**
320                 * {@inheritDoc} Strands are equal if their numbers and symbols match.
321                 */
322                public boolean equals(Object o) {
323                        if (!(o instanceof Strand))
324                                return false;
325                        if (o == this)
326                                return true;
327                        Strand them = (Strand) o;
328                        if (!them.toString().equals(this.name))
329                                return false;
330                        if (them.intValue() != this.value)
331                                return false;
332                        return true;
333                }
334
335                /**
336                 * {@inheritDoc} Strands are compared first by symbol, then by number.
337                 */
338                public int compareTo(Object o) {
339                        Strand fo = (Strand) o;
340                        if (!this.name.equals(fo.toString()))
341                                return this.name.compareTo(fo.toString());
342                        return this.value - fo.intValue();
343                }
344        }
345
346        /**
347         * Some useful tools for working with Locations.
348         */
349        public static class Tools {
350
351                // because we are static, we don't want to get instantiated
352                private Tools() {
353                }
354
355                /**
356                 * Constructs a RichLocation object based on the given collection of
357                 * members. It the collection contains a single location, that is
358                 * returned. If it contains multiple locations it returns a
359                 * CompoundRichLocation covering them all, with the default term
360                 * associated. It returns the empty location if the collection was
361                 * empty.
362                 * 
363                 * @param members
364                 *            the members to construct a location from.
365                 * @return the corresponding RichLocation
366                 */
367                public static RichLocation construct(Collection<Location> members) {
368                        if (members.size() == 0)
369                                return RichLocation.EMPTY_LOCATION;
370                        else if (members.size() == 1){
371                            Location loc =  members.toArray(new Location[0])[0];
372                            return RichLocation.Tools.enrich(loc);
373                        } else if (isMultiSource(members))
374                                return new MultiSourceCompoundRichLocation(members);
375                        else
376                                return new CompoundRichLocation(members);
377                }
378
379                /**
380                 * Returns false if all the locations in the set are from the same
381                 * strand of the same sequence.
382                 * 
383                 * @param members
384                 *            the set of locations to check.
385                 * @return true if they are from multiple sources.
386                 */
387                public static boolean isMultiSource(Collection<Location> members) {
388                        RichLocation previous = null;
389                        for (Iterator<Location> i = members.iterator(); i.hasNext();) {
390                                RichLocation rl = enrich(i.next());
391                                if (previous == null)
392                                        previous = rl;
393                                else {
394                                        if (previous.getCircularLength() != rl.getCircularLength())
395                                                return true;
396                                        if ((previous.getCrossRef() == null && rl.getCrossRef() != null)
397                                                        || (previous.getCrossRef() != null && rl
398                                                                        .getCrossRef() == null)
399                                                        || (previous.getCrossRef() != rl.getCrossRef() && !previous
400                                                                        .getCrossRef().equals(rl.getCrossRef())))
401                                                return true;
402                                        if ((previous.getStrand() == null && rl.getStrand() != null)
403                                                        || (previous.getStrand() != null && rl.getStrand() == null)
404                                                        || (previous.getStrand() != rl.getStrand() && !previous
405                                                                        .getStrand().equals(rl.getStrand())))
406                                                return true;
407                                }
408                        }
409                        return false;
410                }
411
412                /**
413                 * Takes a set of locations and tries to merge all pairs where the union
414                 * operation results in a simple rich location, not a compound one.
415                 * 
416                 * @param members
417                 *            the members to merge
418                 * @return the resulting merged set, which may have only one location in
419                 *         it.
420                 */
421                public static Collection<Location> merge(Collection<Location> members) {
422                        // flatten them out first so we don't end up recursing
423                        List<Location> membersList = new ArrayList<Location>(
424                                        flatten(members));
425                        // all members are now singles so we can use single vs single union
426                        // operations
427                        if (membersList.size() > 1) {
428                                for (int p = 0; p < (membersList.size() - 1); p++) {
429                                        Location parent = membersList.get(p);
430                                        for (int c = p + 1; c < membersList.size(); c++) {
431                                                Location child = membersList.get(c);
432                                                Location union = parent.union(child);
433                                                // if parent can merge with child
434                                                if (union.isContiguous()) {
435                                                        // replace parent with union
436                                                        membersList.set(p, union);
437                                                        // remove child
438                                                        membersList.remove(c);
439                                                        // check all children again
440                                                        p--;
441                                                        break;
442                                                }
443                                        }
444                                }
445                        }
446                        return membersList;
447                }
448
449                /**
450                 * Takes a location and returns the set of all members. If any members
451                 * are compound, it flattens them too.
452                 * 
453                 * @param location
454                 *            the location to flatten
455                 * @return the flattened collection of members.
456                 */
457                public static Collection<Location> flatten(RichLocation location) {
458                        List<Location> members = new ArrayList<Location>();
459                        for (Iterator<Location> i = location.blockIterator(); i.hasNext();)
460                                members.add(i.next());
461                        return flatten(members);
462                }
463
464                /**
465                 * Takes a set of locations and returns the set of all members. If any
466                 * members are compound, it flattens them too.
467                 * 
468                 * @param members
469                 *            the locations to flatten
470                 * @return the flattened collection of members.
471                 */
472                public static Collection<Location> flatten(Collection<Location> members) {
473                        List<Location> flattened = new ArrayList<Location>(members);
474                        for (int i = 0; i < flattened.size(); i++) {
475                                Location member = flattened.get(i);
476                                if (!member.isContiguous()) {
477                                        flattened.remove(i);
478                                        int insertPos = i;
479                                        for (Iterator<Location> j = member.blockIterator(); j
480                                                        .hasNext();)
481                                                flattened.add(insertPos++, j.next());
482                                        i--;
483                                }
484                        }
485                        return flattened;
486                }
487
488                /**
489                 * Takes a start and end position on a circular location of given
490                 * length, and shifts them left along the sequence until they sit at the
491                 * earliest possible point where they still would represent the same
492                 * sequence.
493                 * 
494                 * @param start
495                 *            the start of the circular location
496                 * @param end
497                 *            the end of the circular location
498                 * @param seqLength
499                 *            the circular length of the sequence underlying the
500                 *            location
501                 * @return an integer array where [0] is the translated start and [1]
502                 *         the end.
503                 */
504                public static int[] modulateCircularLocation(int start, int end,
505                                int seqLength) {
506                        // Dummy case for non-circular sequences.
507                        if (seqLength == 0)
508                                return new int[] { start, end };
509                        // Move the end to after the start.
510                        while (end < start)
511                                end += seqLength;
512                        // Calculate the length.
513                        int locationLength = end - start;
514                        // Move the start back till it can go no further
515                        while (start > seqLength)
516                                start -= seqLength;
517                        // Move the end back.
518                        end = start + locationLength;
519                        // Return results.
520                        return new int[] { start, end };
521                }
522
523                /**
524                 * Takes two circular locations of given length, and shifts them left
525                 * along the sequence until they sit at the earliest possible point
526                 * where they still would represent the same sequence. The end result
527                 * ensures that simple overlap calculations will always work on the
528                 * coordinates returned.
529                 * 
530                 * @param a
531                 *            the first location to shift
532                 * @param b
533                 *            the second location to shift
534                 * @param seqLength
535                 *            the circular length of the sequence underlying the
536                 *            location
537                 * @return an integer array where [0] is the translated start and [1]
538                 *         the end of location a, and [2] and [3] are the translated
539                 *         start and end of location b.
540                 */
541                public static int[] modulateCircularLocationPair(Location a,
542                                Location b, int seqLength) {
543                        // Dummy case for non-circular locations.
544                        if (seqLength == 0)
545                                return new int[] { a.getMin(), a.getMax(), b.getMin(),
546                                                b.getMax() };
547                        // Modulate our start/end to shortest possible equivalent region
548                        int[] aParts = modulateCircularLocation(a.getMin(), a.getMax(),
549                                        seqLength);
550                        int aStart = aParts[0];
551                        int aEnd = aParts[1];
552                        // Modulate their start/end to shortest possible equivalent region
553                        int[] bParts = modulateCircularLocation(b.getMin(), b.getMax(),
554                                        seqLength);
555                        int bStart = bParts[0];
556                        int bEnd = bParts[1];
557                        // If we wrap and the point we are checking for is before our start,
558                        // increment it by circularLength length
559                        if (aEnd > seqLength && bStart < aStart) {
560                                bStart += seqLength;
561                                bEnd += seqLength;
562                        }
563                        return new int[] { aStart, aEnd, bStart, bEnd };
564                }
565
566                /**
567                 * Takes a point on a circular location and moves it left until it falls
568                 * at the earliest possible point that represents the same base.
569                 * 
570                 * @param index
571                 *            the point on the location to shift
572                 * @param seqLength
573                 *            the size of the circular location
574                 * @return the shifted point
575                 */
576                public static int modulateCircularIndex(int index, int seqLength) {
577                        // Dummy case
578                        if (seqLength == 0)
579                                return index;
580                        // Modulate
581                        while (index > seqLength)
582                                index -= seqLength;
583                        return index;
584                }
585
586                /**
587                 * Attempts to convert a plain Location into a RichLocation.
588                 * 
589                 * @param l
590                 *            the location to convert
591                 * @return the converted location
592                 */
593                public static RichLocation enrich(Location l) {
594                        // Dummy case where location is already enriched
595                        if (l instanceof RichLocation) {
596                                return (RichLocation) l;
597                        }
598                        // Compound case
599                        else if (l instanceof MergeLocation || !l.isContiguous()) {
600                                List<Location> members = new ArrayList<Location>();
601                                for (Iterator<Location> i = l.blockIterator(); i.hasNext();) {
602                                        Location member = i.next();
603                                        members.add(enrich(member));
604                                }
605                                return RichLocation.Tools.construct(RichLocation.Tools
606                                                .merge(members));
607                        }
608                        // Fuzzy single points
609                        else if (l instanceof FuzzyPointLocation) {
610                                FuzzyPointLocation f = (FuzzyPointLocation) l;
611                                Position pos = new SimplePosition(f.hasBoundedMin(), f
612                                                .hasBoundedMax(), f.getMin(), f.getMax(),
613                                                Position.IN_RANGE);
614                                return new SimpleRichLocation(pos, 0); // 0 for no rank
615                        }
616                        // Fuzzy ranges
617                        else if (l instanceof FuzzyLocation) {
618                                FuzzyLocation f = (FuzzyLocation) l;
619                                Position start = new SimplePosition(f.hasBoundedMin(), false, f
620                                                .getMin());
621                                Position end = new SimplePosition(false, f.hasBoundedMax(), f
622                                                .getMax());
623                                return new SimpleRichLocation(start, end, 0); // 0 for no rank
624                        }
625                        // Normal ranges
626                        else if (l instanceof RangeLocation) {
627                                RangeLocation r = (RangeLocation) l;
628                                Position start = new SimplePosition(false, false, r.getMin());
629                                Position end = new SimplePosition(false, false, r.getMax());
630                                return new SimpleRichLocation(start, end, 0); // 0 for no rank
631                        }
632                        // Normal points
633                        else if (l instanceof PointLocation) {
634                                PointLocation p = (PointLocation) l;
635                                Position pos = new SimplePosition(false, false, p.getMin());
636                                return new SimpleRichLocation(pos, 0); // 0 for no rank
637                        }
638                        // Empty locations
639                        else if (l.toString().equals("{}")) {
640                                return EMPTY_LOCATION;
641                        }
642                        // All other cases
643                        else {
644                                throw new IllegalArgumentException(
645                                                "Unable to enrich locations of type " + l.getClass());
646                        }
647                }
648        }
649}