001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.location.template; 023 024import org.biojava.nbio.core.sequence.Strand; 025import org.biojava.nbio.core.sequence.location.SimpleLocation; 026import org.biojava.nbio.core.sequence.location.SimplePoint; 027import org.biojava.nbio.core.sequence.template.Accessioned; 028import org.biojava.nbio.core.sequence.template.Compound; 029import org.biojava.nbio.core.sequence.template.Sequence; 030 031import java.util.ArrayList; 032import java.util.List; 033 034/** 035 * Sets of integers used to represent the location of features on sequence. A 036 * location can be a single set of bounds or composed of multiple 037 * sub-locations. Each sub-location is a Location and therefore subject to the 038 * same rules. 039 * 040 * @author ayates 041 */ 042public interface Location extends Iterable<Location>, Accessioned { 043 044 /** 045 * Basic location which is set to the minimum and maximum bounds of 046 * {@link Integer}. {@link Strand} is set to {@link Strand#UNDEFINED}. 047 */ 048 public static final Location EMPTY = 049 new SimpleLocation(Integer.MIN_VALUE, Integer.MAX_VALUE, Strand.UNDEFINED); 050 051 /** 052 * Start of the location 053 */ 054 Point getStart(); 055 056 /** 057 * End of the location 058 */ 059 Point getEnd(); 060 061 /** 062 * Returns the length of the outer bounds of this location 063 */ 064 int getLength(); 065 066 /** 067 * Strand which the location is located on 068 */ 069 Strand getStrand(); 070 071 /** 072 * Gives access to the sub locations for this location. However this does 073 * not return sub-locations of sub-locations. For that functionality use 074 * {@link #getAllSubLocations()}. 075 * 076 * @return A list of a single level of sub-locations 077 */ 078 List<Location> getSubLocations(); 079 080 /** 081 * An extension to {@link #getSubLocations()} which returns sub-locations 082 * of sub-locations; this will continue until it runs out of those locations. 083 * 084 * @return List of all sub locations including sub-locations of sub locations 085 */ 086 List<Location> getRelevantSubLocations(); 087 088 /** 089 * Returns true if the location is considered to be complex meaning 090 * the location is actually composed of sub-locations. 091 */ 092 boolean isComplex(); 093 094 /** 095 * Indicates if this location is circular. 096 */ 097 boolean isCircular(); 098 099 /** 100 * Returns true if the position is meant to represent a point between 101 * two points such as 78^79. Only valid if start and stop are next to 102 * each other. 103 */ 104 boolean isBetweenCompounds(); 105 106 /** 107 * Will return a SequenceReader object which represents the outer bounds 108 * of this Location 109 * 110 * @param <C> The type of compound to use 111 * @param sequence The sequence object to work with 112 * @return The sequence 113 */ 114 <C extends Compound> Sequence<C> getSubSequence(Sequence<C> sequence); 115 116 /** 117 * Will return a SequenceReader object which offers a view of all resolved 118 * locations i.e. those locations which are not complex and define the 119 * true Sequence represented 120 * 121 * @param <C> The type of compound to use 122 * @param sequence The sequence object to work with 123 * @return The full assembled sequence 124 */ 125 <C extends Compound> Sequence<C> getRelevantSubSequence(Sequence<C> sequence); 126 127 /** 128 * Helper methods for use with the Location classes. Taking its 129 * inspiration from the RichSequence.Tools class from the old BioJava 130 */ 131 public static class Tools { 132 133 /** 134 * Used for building a location from a series of sub-locations 135 */ 136 public static Location location(List<Location> locations, Integer sequenceLength, String type) { 137 type = (type == null) ? "join" : type; 138 sequenceLength = (sequenceLength == null) ? -1 : sequenceLength; 139 140 141 142 return null; 143 } 144 145 /** 146 * Returns a location object which unlike the location constructors 147 * allows you to input reverse coordinates and will convert 148 * these into the right location on the positive strand. 149 */ 150 public static Location location(int start, int end, Strand strand, int length) { 151 int min = Math.min(start, end); 152 //if this is true then we have a coord on the +ve strand even though Strand could be negative 153 boolean isReverse = (min != start); 154 if (isReverse) { 155 return new SimpleLocation( 156 new SimplePoint(start).reverse(length), 157 new SimplePoint(end).reverse(length), 158 strand); 159 } 160 return new SimpleLocation(start, end, strand); 161 } 162 163 /** 164 * Converts a location which defines the outer bounds of a circular 165 * location and splits it into the required portions. Unlike any 166 * other location builder this allows you to express your input 167 * location on the reverse strand 168 * 169 * @param location The location which currently expresses the outer 170 * bounds of a circular location. 171 * @param length The length of the circular genomic unit 172 * @return The circular location; can optionally return a normal non 173 * circular location if the one you give is within the bounds of 174 * the length 175 */ 176 public static Location circularLocation(int start, int end, Strand strand, int length) { 177 178 int min = Math.min(start, end); 179 int max = Math.max(start, end); 180 //Tells us we're dealing with something that's not _right_ 181 boolean isReverse = (min != start); 182 183 if (min > length) { 184 throw new IllegalArgumentException("Cannot process a " 185 + "location whose lowest coordinate is less than " 186 + "the given length " + length); 187 } 188 189 //If max positon was less than length the return a normal location 190 if (max <= length) { 191 return location(start, end, strand, length); 192 } 193 194 //Fine for forward coords (i..e start < end) 195 int modStart = modulateCircularIndex(start, length); 196 int modEnd = modulateCircularIndex(end, length); 197 int numberOfPasses = completeCircularPasses(Math.max(start, end), length); 198 199 if (isReverse) { 200 int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition(); 201 int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition(); 202 modStart = reversedModStart; 203 modEnd = reversedModEnd; 204 start = reversedModStart; 205 //+1 to number of passes to skip the run encoded by the start 206 end = (length * (numberOfPasses + 1)) + modEnd; 207 } 208 209 List<Location> locations = new ArrayList<Location>(); 210 locations.add(new SimpleLocation(modStart, length, strand)); 211 for (int i = 0; i < numberOfPasses; i++) { 212 locations.add(new SimpleLocation(1, length, strand)); 213 } 214 locations.add(new SimpleLocation(1, modEnd, strand)); 215 return new SimpleLocation(new SimplePoint(start), 216 new SimplePoint(end), strand, true, false, locations); 217 } 218 219 private static interface LocationPredicate { 220 221 boolean accept(Location previous, Location current); 222 } 223 224 /** 225 * Scans through a list of locations to find the Location with the 226 * lowest start 227 */ 228 public static Location getMin(List<Location> locations) { 229 return scanLocations(locations, new LocationPredicate() { 230 231 @Override 232 public boolean accept(Location previous, Location current) { 233 int res = current.getStart().compareTo(previous.getStart()); 234 return res < 0; 235 } 236 }); 237 } 238 239 /** 240 * Scans through a list of locations to find the Location with the 241 * highest end 242 */ 243 public static Location getMax(List<Location> locations) { 244 return scanLocations(locations, new LocationPredicate() { 245 246 @Override 247 public boolean accept(Location previous, Location current) { 248 int res = current.getEnd().compareTo(previous.getEnd()); 249 return res > 0; 250 } 251 }); 252 } 253 254 /** 255 * Used for scanning through a list of locations; assumes the 256 * locations given will have at least one value otherwise 257 * we will get a null pointer 258 */ 259 private static Location scanLocations(List<Location> locations, LocationPredicate predicate) { 260 Location location = null; 261 for (Location l : locations) { 262 if (location == null) { 263 location = l; 264 } else { 265 if (predicate.accept(location, l)) { 266 location = l; 267 } 268 } 269 } 270 return location; 271 } 272 273 /** 274 * Takes a point on a circular location and moves it left until it falls 275 * at the earliest possible point that represents the same base. 276 * 277 * @param index Index of the position to work with 278 * @param seqLength Length of the Sequence 279 * @return The shifted point 280 */ 281 public static int modulateCircularIndex(int index, int seqLength) { 282 // Dummy case 283 if (seqLength == 0) { 284 return index; 285 } 286 // Modulate 287 while (index > seqLength) { 288 index -= seqLength; 289 } 290 return index; 291 } 292 293 /** 294 * Works in a similar way to modulateCircularLocation but returns 295 * the number of complete passes over a Sequence length a circular 296 * location makes i.e. if we have a sequence of length 10 and the 297 * location 3..52 we make 4 complete passes through the genome to 298 * go from position 3 to position 52. 299 */ 300 public static int completeCircularPasses(int index, int seqLength) { 301 int count = 0; 302 while (index > seqLength) { 303 count++; 304 index -= seqLength; 305 } 306 return count - 1; 307 } 308 } 309}