001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.location;
023
024import org.biojava.nbio.core.exceptions.ParserException;
025import org.biojava.nbio.core.sequence.AccessionID;
026import org.biojava.nbio.core.sequence.Strand;
027import org.biojava.nbio.core.sequence.location.template.Location;
028import org.biojava.nbio.core.sequence.location.template.Point;
029
030import java.util.ArrayList;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Set;
034
035/**
036 * Helper methods for use with the Location classes. Taking its
037 * inspiration from the RichSequence.Tools class from the old BioJava
038 */
039public class LocationHelper {
040
041        /**
042         * Used as a thin wrapper to the {@link #location(java.util.List, java.lang.String) }
043         * method to bring the given location list together as a join (the default
044         * type)
045         */
046        public static Location location(List<Location> subLocations) {
047                return location(subLocations, "join");
048        }
049
050        /**
051         * Builds a location from a List of locations; this can be circular or
052         * linear joins. The code expects that these locations are in
053         * a sensible format.
054         *
055         * @param subLocations The list of locations to use to build the location.
056         * If given a list of size 1 we will return that location.
057         * @param type The type of join for this location; defaults to join
058         * @return
059         */
060        public static Location location(List<Location> subLocations, String type) {
061                if (subLocations.size() == 1) {
062                        return subLocations.get(0);
063                }
064
065                boolean circular = detectCicular(subLocations);
066                Strand strand = detectStrand(subLocations);
067                Point start = detectStart(subLocations);
068                Point end = detectEnd(subLocations, circular);
069                Location l;
070                if ("join".equals(type)) {
071                        l = new SimpleLocation(start, end, strand, circular, subLocations);
072                }
073                else if ("order".equals(type)) {
074                        l = new InsdcLocations.OrderLocation(start, end, strand, circular, subLocations);
075                }
076                else if ("one-of".equals(type)) {
077                        l = new InsdcLocations.OneOfLocation(subLocations);
078                }
079                else if ("group".equals(type)) {
080                        l = new InsdcLocations.GroupLocation(start, end, strand, circular, subLocations);
081                }
082                else if ("bond".equals(type)) {
083                        l = new InsdcLocations.BondLocation(subLocations);
084                }
085                else {
086                        throw new ParserException("Unknown join type " + type);
087                }
088
089                return l;
090        }
091
092        /**
093         * Returns a location object which unlike the location constructors
094         * allows you to input reverse coordinates and will convert
095         * these into the right location on the positive strand.
096         */
097        public static Location location(int start, int end, Strand strand, int length) {
098                int min = Math.min(start, end);
099                //if this is true then we have a coord on the +ve strand even though Strand could be negative
100                boolean isReverse = (min != start);
101                if (isReverse) {
102                        return new SimpleLocation(
103                                        new SimplePoint(start).reverse(length),
104                                        new SimplePoint(end).reverse(length),
105                                        strand);
106                }
107                return new SimpleLocation(start, end, strand);
108        }
109
110        /**
111         * Converts a location which defines the outer bounds of a circular
112         * location and splits it into the required portions. Unlike any
113         * other location builder this allows you to express your input
114         * location on the reverse strand
115         *
116         * @param start The location which currently expresses the outer
117         * bounds of a circular location.
118         * @param length The length of the circular genomic unit
119         * @return The circular location; can optionally return a normal non
120         * circular location if the one you give is within the bounds of
121         * the length
122         */
123        public static Location circularLocation(int start, int end, Strand strand, int length) {
124
125                int min = Math.min(start, end);
126                int max = Math.max(start, end);
127                //Tells us we're dealing with something that's not _right_
128                boolean isReverse = (min != start);
129
130                if (min > length) {
131                        throw new IllegalArgumentException("Cannot process a "
132                                        + "location whose lowest coordinate is less than "
133                                        + "the given length " + length);
134                }
135
136                //If max positon was less than length the return a normal location
137                if (max <= length) {
138                        return location(start, end, strand, length);
139                }
140
141                //Fine for forward coords (i..e start < end)
142                int modStart = modulateCircularIndex(start, length);
143                int modEnd = modulateCircularIndex(end, length);
144                int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
145
146                if (isReverse) {
147                        int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
148                        int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
149                        modStart = reversedModStart;
150                        modEnd = reversedModEnd;
151                        start = reversedModStart;
152                        //+1 to number of passes to skip the run encoded by the start
153                        end = (length * (numberOfPasses + 1)) + modEnd;
154                }
155
156                List<Location> locations = new ArrayList<>();
157                locations.add(new SimpleLocation(modStart, length, strand));
158                for (int i = 0; i < numberOfPasses; i++) {
159                        locations.add(new SimpleLocation(1, length, strand));
160                }
161                locations.add(new SimpleLocation(1, modEnd, strand));
162                return new SimpleLocation(new SimplePoint(start),
163                                new SimplePoint(end), strand, true, false, locations);
164        }
165
166        private static interface LocationPredicate {
167                boolean accept(Location previous, Location current);
168        }
169
170        /**
171         * Scans through a list of locations to find the Location with the
172         * lowest start
173         */
174        public static Location getMin(List<Location> locations) {
175                return scanLocations(locations, new LocationPredicate() {
176                        @Override
177                        public boolean accept(Location previous, Location current) {
178                                int res = current.getStart().compareTo(previous.getStart());
179                                return res < 0;
180                        }
181                });
182        }
183
184        /**
185         * Scans through a list of locations to find the Location with the
186         * highest end
187         */
188        public static Location getMax(List<Location> locations) {
189                return scanLocations(locations, new LocationPredicate() {
190                        @Override
191                        public boolean accept(Location previous, Location current) {
192                                int res = current.getEnd().compareTo(previous.getEnd());
193                                return res > 0;
194                        }
195                });
196        }
197
198        /**
199         * Used for scanning through a list of locations; assumes the
200         * locations given will have at least one value otherwise
201         * we will get a null pointer
202         */
203        private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
204                Location location = null;
205                for (Location l : locations) {
206                        if (location == null) {
207                                location = l;
208                        }
209                        else {
210                                if (predicate.accept(location, l)) {
211                                        location = l;
212                                }
213                        }
214                }
215                return location;
216        }
217
218        /**
219         * Takes a point on a circular location and moves it left until it falls
220         * at the earliest possible point that represents the same base.
221         *
222         * @param index Index of the position to work with
223         * @param seqLength Length of the Sequence
224         * @return The shifted point
225         */
226        public static int modulateCircularIndex(int index, int seqLength) {
227                // Dummy case
228                if (seqLength == 0) {
229                        return index;
230                }
231                // Modulate
232                while (index > seqLength) {
233                        index -= seqLength;
234                }
235                return index;
236        }
237
238        /**
239         * Works in a similar way to modulateCircularLocation but returns
240         * the number of complete passes over a Sequence length a circular
241         * location makes i.e. if we have a sequence of length 10 and the
242         * location 3..52 we make 4 complete passes through the genome to
243         * go from position 3 to position 52.
244         */
245        public static int completeCircularPasses(int index, int seqLength) {
246                int count = 0;
247                while (index > seqLength) {
248                        count++;
249                        index -= seqLength;
250                }
251                return count - 1;
252        }
253
254        /**
255         * Loops through the given list of locations and returns true if it looks
256         * like they represent a circular location. Detection cannot happen if
257         * we do not have consistent accessions
258         */
259        public static boolean detectCicular(List<Location> subLocations) {
260                boolean isCircular = false;
261                if(! consistentAccessions(subLocations))
262                        return isCircular;
263
264                int lastMax = 0;
265                for (Location sub : subLocations) {
266                        if (sub.getEnd().getPosition() > lastMax) {
267                                lastMax = sub.getEnd().getPosition();
268                        }
269                        else {
270                                isCircular = true;
271                                break;
272                        }
273                }
274                return isCircular;
275        }
276
277        /**
278         * Scans a list of locations and returns true if all the given locations
279         * are linked to the same sequence. A list of null accessioned locations
280         * is the same as a list where the accession is the same
281         *
282         * @param subLocations The locations to scan
283         * @return Returns a boolean indicating if this is consistently accessioned
284         */
285        public static boolean consistentAccessions(List<Location> subLocations) {
286                Set<AccessionID> set = new HashSet<>();
287                for(Location sub: subLocations) {
288                        set.add(sub.getAccession());
289                }
290                return set.size() == 1;
291        }
292
293        /**
294         * Loops through the given list of locations and returns the consensus
295         * Strand class. If the class switches then we will return an undefined
296         * strand
297         */
298        public static Strand detectStrand(List<Location> subLocations) {
299                Strand strand = subLocations.get(0).getStrand();
300                for (Location sub : subLocations) {
301                        if (strand != sub.getStrand()) {
302                                strand = Strand.UNDEFINED;
303                                break;
304                        }
305                }
306                return strand;
307        }
308
309        /**
310         * Assumes that the first element is the start &amp; clones it
311         */
312        public static Point detectStart(List<Location> subLocations) {
313                return subLocations.get(0).getStart().clonePoint();
314        }
315
316        /**
317         * This will attempt to find what the last point is and returns that
318         * position. If the location is circular this will return the total length
319         * of the location and does not mean the maximum point on the Sequence
320         * we may find the locations on
321         */
322        public static Point detectEnd(List<Location> subLocations, boolean isCircular) {
323                int end = 0;
324                Point lastPoint = null;
325                if(isCircular) {
326                        for (Location sub : subLocations) {
327                                lastPoint = sub.getEnd();
328                                end += lastPoint.getPosition();
329                        }
330                }
331                else {
332                        lastPoint = subLocations.get(subLocations.size()-1).getEnd();
333                        end = lastPoint.getPosition();
334                }
335                return new SimplePoint(end, lastPoint.isUnknown(), lastPoint.isUncertain());
336        }
337}