001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.location; 023 024import org.biojava.nbio.core.exceptions.ParserException; 025import org.biojava.nbio.core.sequence.AccessionID; 026import org.biojava.nbio.core.sequence.Strand; 027import org.biojava.nbio.core.sequence.location.template.Location; 028import org.biojava.nbio.core.sequence.location.template.Point; 029 030import java.util.ArrayList; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Set; 034 035/** 036 * Helper methods for use with the Location classes. Taking its 037 * inspiration from the RichSequence.Tools class from the old BioJava 038 */ 039public class LocationHelper { 040 041 /** 042 * Used as a thin wrapper to the {@link #location(java.util.List, java.lang.String) } 043 * method to bring the given location list together as a join (the default 044 * type) 045 */ 046 public static Location location(List<Location> subLocations) { 047 return location(subLocations, "join"); 048 } 049 050 /** 051 * Builds a location from a List of locations; this can be circular or 052 * linear joins. The code expects that these locations are in 053 * a sensible format. 054 * 055 * @param subLocations The list of locations to use to build the location. 056 * If given a list of size 1 we will return that location. 057 * @param type The type of join for this location; defaults to join 058 * @return 059 */ 060 public static Location location(List<Location> subLocations, String type) { 061 if (subLocations.size() == 1) { 062 return subLocations.get(0); 063 } 064 065 boolean circular = detectCicular(subLocations); 066 Strand strand = detectStrand(subLocations); 067 Point start = detectStart(subLocations); 068 Point end = detectEnd(subLocations, circular); 069 Location l; 070 if ("join".equals(type)) { 071 l = new SimpleLocation(start, end, strand, circular, subLocations); 072 } 073 else if ("order".equals(type)) { 074 l = new InsdcLocations.OrderLocation(start, end, strand, circular, subLocations); 075 } 076 else if ("one-of".equals(type)) { 077 l = new InsdcLocations.OneOfLocation(subLocations); 078 } 079 else if ("group".equals(type)) { 080 l = new InsdcLocations.GroupLocation(start, end, strand, circular, subLocations); 081 } 082 else if ("bond".equals(type)) { 083 l = new InsdcLocations.BondLocation(subLocations); 084 } 085 else { 086 throw new ParserException("Unknown join type " + type); 087 } 088 089 return l; 090 } 091 092 /** 093 * Returns a location object which unlike the location constructors 094 * allows you to input reverse coordinates and will convert 095 * these into the right location on the positive strand. 096 */ 097 public static Location location(int start, int end, Strand strand, int length) { 098 int min = Math.min(start, end); 099 //if this is true then we have a coord on the +ve strand even though Strand could be negative 100 boolean isReverse = (min != start); 101 if (isReverse) { 102 return new SimpleLocation( 103 new SimplePoint(start).reverse(length), 104 new SimplePoint(end).reverse(length), 105 strand); 106 } 107 return new SimpleLocation(start, end, strand); 108 } 109 110 /** 111 * Converts a location which defines the outer bounds of a circular 112 * location and splits it into the required portions. Unlike any 113 * other location builder this allows you to express your input 114 * location on the reverse strand 115 * 116 * @param start The location which currently expresses the outer 117 * bounds of a circular location. 118 * @param length The length of the circular genomic unit 119 * @return The circular location; can optionally return a normal non 120 * circular location if the one you give is within the bounds of 121 * the length 122 */ 123 public static Location circularLocation(int start, int end, Strand strand, int length) { 124 125 int min = Math.min(start, end); 126 int max = Math.max(start, end); 127 //Tells us we're dealing with something that's not _right_ 128 boolean isReverse = (min != start); 129 130 if (min > length) { 131 throw new IllegalArgumentException("Cannot process a " 132 + "location whose lowest coordinate is less than " 133 + "the given length " + length); 134 } 135 136 //If max positon was less than length the return a normal location 137 if (max <= length) { 138 return location(start, end, strand, length); 139 } 140 141 //Fine for forward coords (i..e start < end) 142 int modStart = modulateCircularIndex(start, length); 143 int modEnd = modulateCircularIndex(end, length); 144 int numberOfPasses = completeCircularPasses(Math.max(start, end), length); 145 146 if (isReverse) { 147 int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition(); 148 int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition(); 149 modStart = reversedModStart; 150 modEnd = reversedModEnd; 151 start = reversedModStart; 152 //+1 to number of passes to skip the run encoded by the start 153 end = (length * (numberOfPasses + 1)) + modEnd; 154 } 155 156 List<Location> locations = new ArrayList<>(); 157 locations.add(new SimpleLocation(modStart, length, strand)); 158 for (int i = 0; i < numberOfPasses; i++) { 159 locations.add(new SimpleLocation(1, length, strand)); 160 } 161 locations.add(new SimpleLocation(1, modEnd, strand)); 162 return new SimpleLocation(new SimplePoint(start), 163 new SimplePoint(end), strand, true, false, locations); 164 } 165 166 private static interface LocationPredicate { 167 boolean accept(Location previous, Location current); 168 } 169 170 /** 171 * Scans through a list of locations to find the Location with the 172 * lowest start 173 */ 174 public static Location getMin(List<Location> locations) { 175 return scanLocations(locations, new LocationPredicate() { 176 @Override 177 public boolean accept(Location previous, Location current) { 178 int res = current.getStart().compareTo(previous.getStart()); 179 return res < 0; 180 } 181 }); 182 } 183 184 /** 185 * Scans through a list of locations to find the Location with the 186 * highest end 187 */ 188 public static Location getMax(List<Location> locations) { 189 return scanLocations(locations, new LocationPredicate() { 190 @Override 191 public boolean accept(Location previous, Location current) { 192 int res = current.getEnd().compareTo(previous.getEnd()); 193 return res > 0; 194 } 195 }); 196 } 197 198 /** 199 * Used for scanning through a list of locations; assumes the 200 * locations given will have at least one value otherwise 201 * we will get a null pointer 202 */ 203 private static Location scanLocations(List<Location> locations, LocationPredicate predicate) { 204 Location location = null; 205 for (Location l : locations) { 206 if (location == null) { 207 location = l; 208 } 209 else { 210 if (predicate.accept(location, l)) { 211 location = l; 212 } 213 } 214 } 215 return location; 216 } 217 218 /** 219 * Takes a point on a circular location and moves it left until it falls 220 * at the earliest possible point that represents the same base. 221 * 222 * @param index Index of the position to work with 223 * @param seqLength Length of the Sequence 224 * @return The shifted point 225 */ 226 public static int modulateCircularIndex(int index, int seqLength) { 227 // Dummy case 228 if (seqLength == 0) { 229 return index; 230 } 231 // Modulate 232 while (index > seqLength) { 233 index -= seqLength; 234 } 235 return index; 236 } 237 238 /** 239 * Works in a similar way to modulateCircularLocation but returns 240 * the number of complete passes over a Sequence length a circular 241 * location makes i.e. if we have a sequence of length 10 and the 242 * location 3..52 we make 4 complete passes through the genome to 243 * go from position 3 to position 52. 244 */ 245 public static int completeCircularPasses(int index, int seqLength) { 246 int count = 0; 247 while (index > seqLength) { 248 count++; 249 index -= seqLength; 250 } 251 return count - 1; 252 } 253 254 /** 255 * Loops through the given list of locations and returns true if it looks 256 * like they represent a circular location. Detection cannot happen if 257 * we do not have consistent accessions 258 */ 259 public static boolean detectCicular(List<Location> subLocations) { 260 boolean isCircular = false; 261 if(! consistentAccessions(subLocations)) 262 return isCircular; 263 264 int lastMax = 0; 265 for (Location sub : subLocations) { 266 if (sub.getEnd().getPosition() > lastMax) { 267 lastMax = sub.getEnd().getPosition(); 268 } 269 else { 270 isCircular = true; 271 break; 272 } 273 } 274 return isCircular; 275 } 276 277 /** 278 * Scans a list of locations and returns true if all the given locations 279 * are linked to the same sequence. A list of null accessioned locations 280 * is the same as a list where the accession is the same 281 * 282 * @param subLocations The locations to scan 283 * @return Returns a boolean indicating if this is consistently accessioned 284 */ 285 public static boolean consistentAccessions(List<Location> subLocations) { 286 Set<AccessionID> set = new HashSet<>(); 287 for(Location sub: subLocations) { 288 set.add(sub.getAccession()); 289 } 290 return set.size() == 1; 291 } 292 293 /** 294 * Loops through the given list of locations and returns the consensus 295 * Strand class. If the class switches then we will return an undefined 296 * strand 297 */ 298 public static Strand detectStrand(List<Location> subLocations) { 299 Strand strand = subLocations.get(0).getStrand(); 300 for (Location sub : subLocations) { 301 if (strand != sub.getStrand()) { 302 strand = Strand.UNDEFINED; 303 break; 304 } 305 } 306 return strand; 307 } 308 309 /** 310 * Assumes that the first element is the start & clones it 311 */ 312 public static Point detectStart(List<Location> subLocations) { 313 return subLocations.get(0).getStart().clonePoint(); 314 } 315 316 /** 317 * This will attempt to find what the last point is and returns that 318 * position. If the location is circular this will return the total length 319 * of the location and does not mean the maximum point on the Sequence 320 * we may find the locations on 321 */ 322 public static Point detectEnd(List<Location> subLocations, boolean isCircular) { 323 int end = 0; 324 Point lastPoint = null; 325 if(isCircular) { 326 for (Location sub : subLocations) { 327 lastPoint = sub.getEnd(); 328 end += lastPoint.getPosition(); 329 } 330 } 331 else { 332 lastPoint = subLocations.get(subLocations.size()-1).getEnd(); 333 end = lastPoint.getPosition(); 334 } 335 return new SimplePoint(end, lastPoint.isUnknown(), lastPoint.isUncertain()); 336 } 337}