001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.location.template;
023
024import org.biojava.nbio.core.sequence.Strand;
025import org.biojava.nbio.core.sequence.location.SimpleLocation;
026import org.biojava.nbio.core.sequence.location.SimplePoint;
027import org.biojava.nbio.core.sequence.template.Accessioned;
028import org.biojava.nbio.core.sequence.template.Compound;
029import org.biojava.nbio.core.sequence.template.Sequence;
030
031import java.util.ArrayList;
032import java.util.List;
033
034/**
035 * Sets of integers used to represent the location of features on sequence. A
036 * location can be a single set of bounds or composed of multiple
037 * sub-locations. Each sub-location is a Location and therefore subject to the
038 * same rules.
039 *
040 * @author ayates
041 */
042public interface Location extends Iterable<Location>, Accessioned {
043
044        /**
045         * Basic location which is set to the minimum and maximum bounds of
046         * {@link Integer}. {@link Strand} is set to {@link Strand#UNDEFINED}.
047         */
048        public static final Location EMPTY =
049                        new SimpleLocation(Integer.MIN_VALUE, Integer.MAX_VALUE, Strand.UNDEFINED);
050
051        /**
052         * Start of the location
053         */
054        Point getStart();
055
056        /**
057         * End of the location
058         */
059        Point getEnd();
060
061        /**
062         * Returns the length of the outer bounds of this location
063         */
064        int getLength();
065
066        /**
067         * Strand which the location is located on
068         */
069        Strand getStrand();
070
071        /**
072         * Gives access to the sub locations for this location. However this does
073         * not return sub-locations of sub-locations. For that functionality use
074         * {@link #getAllSubLocations()}.
075         *
076         * @return A list of a single level of sub-locations
077         */
078        List<Location> getSubLocations();
079
080        /**
081         * An extension to {@link #getSubLocations()} which returns sub-locations
082         * of sub-locations; this will continue until it runs out of those locations.
083         *
084         * @return List of all sub locations including sub-locations of sub locations
085         */
086        List<Location> getRelevantSubLocations();
087
088        /**
089         * Returns true if the location is considered to be complex meaning
090         * the location is actually composed of sub-locations.
091         */
092        boolean isComplex();
093
094        /**
095         * Indicates if this location is circular.
096         */
097        boolean isCircular();
098
099        /**
100         * Returns true if the position is meant to represent a point between
101         * two points such as 78^79. Only valid if start and stop are next to
102         * each other.
103         */
104        boolean isBetweenCompounds();
105
106        /**
107         * Will return a SequenceReader object which represents the outer bounds
108         * of this Location
109         *
110         * @param &lt;C&gt; The type of compound to use
111         * @param sequence The sequence object to work with
112         * @return The sequence
113         */
114        <C extends Compound> Sequence<C> getSubSequence(Sequence<C> sequence);
115
116        /**
117         * Will return a SequenceReader object which offers a view of all resolved
118         * locations i.e. those locations which are not complex and define the
119         * true Sequence represented
120         *
121         * @param &lt;C&gt; The type of compound to use
122         * @param sequence The sequence object to work with
123         * @return The full assembled sequence
124         */
125        <C extends Compound> Sequence<C> getRelevantSubSequence(Sequence<C> sequence);
126
127        /**
128         * Helper methods for use with the Location classes. Taking its
129         * inspiration from the RichSequence.Tools class from the old BioJava
130         */
131        public static class Tools {
132
133                /**
134                 * Used for building a location from a series of sub-locations
135                 */
136                public static Location location(List<Location> locations, Integer sequenceLength, String type) {
137                        type = (type == null) ? "join" : type;
138                        sequenceLength = (sequenceLength == null) ? -1 : sequenceLength;
139
140
141
142                        return null;
143                }
144
145                /**
146                 * Returns a location object which unlike the location constructors
147                 * allows you to input reverse coordinates and will convert
148                 * these into the right location on the positive strand.
149                 */
150                public static Location location(int start, int end, Strand strand, int length) {
151                        int min = Math.min(start, end);
152                        //if this is true then we have a coord on the +ve strand even though Strand could be negative
153                        boolean isReverse = (min != start);
154                        if (isReverse) {
155                                return new SimpleLocation(
156                                                new SimplePoint(start).reverse(length),
157                                                new SimplePoint(end).reverse(length),
158                                                strand);
159                        }
160                        return new SimpleLocation(start, end, strand);
161                }
162
163                /**
164                 * Converts a location which defines the outer bounds of a circular
165                 * location and splits it into the required portions. Unlike any
166                 * other location builder this allows you to express your input
167                 * location on the reverse strand
168                 *
169                 * @param location The location which currently expresses the outer
170                 * bounds of a circular location.
171                 * @param length The length of the circular genomic unit
172                 * @return The circular location; can optionally return a normal non
173                 * circular location if the one you give is within the bounds of
174                 * the length
175                 */
176                public static Location circularLocation(int start, int end, Strand strand, int length) {
177
178                        int min = Math.min(start, end);
179                        int max = Math.max(start, end);
180                        //Tells us we're dealing with something that's not _right_
181                        boolean isReverse = (min != start);
182
183                        if (min > length) {
184                                throw new IllegalArgumentException("Cannot process a "
185                                                + "location whose lowest coordinate is less than "
186                                                + "the given length " + length);
187                        }
188
189                        //If max positon was less than length the return a normal location
190                        if (max <= length) {
191                                return location(start, end, strand, length);
192                        }
193
194                        //Fine for forward coords (i..e start < end)
195                        int modStart = modulateCircularIndex(start, length);
196                        int modEnd = modulateCircularIndex(end, length);
197                        int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
198
199                        if (isReverse) {
200                                int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
201                                int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
202                                modStart = reversedModStart;
203                                modEnd = reversedModEnd;
204                                start = reversedModStart;
205                                //+1 to number of passes to skip the run encoded by the start
206                                end = (length * (numberOfPasses + 1)) + modEnd;
207                        }
208
209                        List<Location> locations = new ArrayList<Location>();
210                        locations.add(new SimpleLocation(modStart, length, strand));
211                        for (int i = 0; i < numberOfPasses; i++) {
212                                locations.add(new SimpleLocation(1, length, strand));
213                        }
214                        locations.add(new SimpleLocation(1, modEnd, strand));
215                        return new SimpleLocation(new SimplePoint(start),
216                                        new SimplePoint(end), strand, true, false, locations);
217                }
218
219                private static interface LocationPredicate {
220
221                        boolean accept(Location previous, Location current);
222                }
223
224                /**
225                 * Scans through a list of locations to find the Location with the
226                 * lowest start
227                 */
228                public static Location getMin(List<Location> locations) {
229                        return scanLocations(locations, new LocationPredicate() {
230
231                                @Override
232                                public boolean accept(Location previous, Location current) {
233                                        int res = current.getStart().compareTo(previous.getStart());
234                                        return res < 0;
235                                }
236                        });
237                }
238
239                /**
240                 * Scans through a list of locations to find the Location with the
241                 * highest end
242                 */
243                public static Location getMax(List<Location> locations) {
244                        return scanLocations(locations, new LocationPredicate() {
245
246                                @Override
247                                public boolean accept(Location previous, Location current) {
248                                        int res = current.getEnd().compareTo(previous.getEnd());
249                                        return res > 0;
250                                }
251                        });
252                }
253
254                /**
255                 * Used for scanning through a list of locations; assumes the
256                 * locations given will have at least one value otherwise
257                 * we will get a null pointer
258                 */
259                private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
260                        Location location = null;
261                        for (Location l : locations) {
262                                if (location == null) {
263                                        location = l;
264                                } else {
265                                        if (predicate.accept(location, l)) {
266                                                location = l;
267                                        }
268                                }
269                        }
270                        return location;
271                }
272
273                /**
274                 * Takes a point on a circular location and moves it left until it falls
275                 * at the earliest possible point that represents the same base.
276                 *
277                 * @param index Index of the position to work with
278                 * @param seqLength Length of the Sequence
279                 * @return The shifted point
280                 */
281                public static int modulateCircularIndex(int index, int seqLength) {
282                        // Dummy case
283                        if (seqLength == 0) {
284                                return index;
285                        }
286                        // Modulate
287                        while (index > seqLength) {
288                                index -= seqLength;
289                        }
290                        return index;
291                }
292
293                /**
294                 * Works in a similar way to modulateCircularLocation but returns
295                 * the number of complete passes over a Sequence length a circular
296                 * location makes i.e. if we have a sequence of length 10 and the
297                 * location 3..52 we make 4 complete passes through the genome to
298                 * go from position 3 to position 52.
299                 */
300                public static int completeCircularPasses(int index, int seqLength) {
301                        int count = 0;
302                        while (index > seqLength) {
303                                count++;
304                                index -= seqLength;
305                        }
306                        return count - 1;
307                }
308        }
309}