001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.parsers.gff; 022 023import org.biojava.nbio.core.sequence.DNASequence; 024 025import java.io.Serializable; 026import java.util.*; 027import java.util.Map.Entry; 028 029 030 031/** 032 * A list of FeatureI objects implemented using a Java ArrayList; corresponds to a GFF file. 033 * This class is implemented entirely using FeatureI objects, so everything here will work 034 * correctly if you choose to implement your own feature class -- there are no dependencies 035 * on JavaGene's native Feature class. 036 * 037 * 038 * @author Hanno Hinsch, Carmelo Foti 039 */ 040@SuppressWarnings("serial") 041public class FeatureList extends ArrayList<FeatureI> { 042 043 Map<String, Map<String,List<FeatureI>>> featindex = new HashMap<>(); 044 Location mLocation; //genomic location (union of feature locations) 045 046 /** 047 * Construct an empty list. 048 */ 049 public FeatureList() { 050 mLocation = null; 051 } 052 053 /** 054 * Construct a new list containing the same features 055 * as the specified list. 056 * 057 * @param features An existing list or collection of FeatureI objects. 058 */ 059 public FeatureList(Collection<FeatureI> features) { 060 this(); 061 this.add(features); 062 063 mLocation = null; 064 } 065 066 /** 067 * Add specified feature to the end of the list. Updates the bounding location of the 068 * feature list, if needed. 069 * 070 * @param feature The FeatureI object to add. 071 * @return True if the feature was added. 072 */ 073 @Override 074 public boolean add(FeatureI feature) { 075 if (mLocation == null) { 076 mLocation = feature.location().plus(); 077 } else if (null != feature.location()) { 078 mLocation = mLocation.union(feature.location().plus()); 079 } 080 for (Entry<String, String> entry : feature.getAttributes().entrySet()){ 081 if (featindex.containsKey(entry.getKey())){ 082 Map<String,List<FeatureI>> feat = featindex.get(entry.getKey()); 083 if (feat==null){ 084 feat= new HashMap<>(); 085 } 086 List<FeatureI> features = feat.get(entry.getValue()); 087 if (features==null){ 088 features = new ArrayList<>(); 089 } 090 features.add(feature); 091 feat.put(entry.getValue(), features); 092 featindex.put(entry.getKey(), feat); 093 //featindex.put(key, value) 094 } 095 } 096 097 return super.add(feature); 098 } 099 100 /** 101 * Add all features in the specified list or collection to this list. 102 * 103 * @param list The collection of FeatureI objects. 104 */ 105 public void add(Collection<FeatureI> list) { 106 for (FeatureI f : list) { 107 add(f); 108 } 109 } 110 111 /** 112 * The union of all locations of all features in this list, mapped to the positive strand. 113 * If an added feature is on the negative strand, its positive strand image is added 114 * to the union. 115 * The bounding location is not updated when a feature is removed from the list, so 116 * it is not guaranteed to be the minimal bounding location. 117 * 118 * @return A location that is the union of all feature locations in the list. 119 */ 120 public Location bounds() { 121 return mLocation; 122 } 123 124 /** 125 * Check size of gaps between successive features in list. The features in 126 * the list are assumed to be appropriately ordered. 127 * 128 * @param gapLength The minimum gap length to consider. Use a gapLength 129 * of 0 to check if features are contiguous. 130 * @return True if list has any gaps equal to or greater than gapLength. 131 */ 132 public boolean hasGaps(int gapLength) { 133 Location last = null; 134 for (FeatureI f : this) { 135 if (last != null && gapLength <= f.location().distance(last)) { 136 return true; 137 } else { 138 last = f.location(); 139 } 140 } 141 142 return false; 143 } 144 145 146 147 148 /** 149 * Concatenate successive portions of the specified sequence 150 * using the feature locations in the list. The list is assumed to be appropriately 151 * ordered. 152 * 153 * @param sequence The source sequence from which portions should be selected. 154 * @return The spliced data. 155 * @throws IllegalStateException Out of order or overlapping FeatureI locations detected. 156 * 157 */ 158 public String splice(DNASequence sequence) { 159 StringBuilder subData = new StringBuilder(); 160 Location last = null; 161 162 for (FeatureI f : this) { 163 Location loc = f.location(); 164 165 if (last == null || loc.startsAfter(last)) { 166 subData.append(sequence.getSubSequence(loc.start(), loc.end()).toString()); 167 last = loc; 168 } else { 169 throw new IllegalStateException("Splice: Feature locations should not overlap."); 170 } 171 172 } 173 174 return subData.toString(); 175 } 176 177 /** 178 * Create a collection of all unique group ids in the list, as defined 179 * by the group() method of the features. For example, if the 180 * features are from a GFF1 file, then each group id identifies a particular gene, 181 * and this method returns a collection of all gene ids. 182 * 183 * @return A collection (suitable for iteration using Java's "for" loop) of all the 184 * group ids found in this list. The order of the values is undefined; it will not match 185 * the order of features in the list. 186 */ 187 public Collection<String> groupValues() { 188 Set<String> set = new HashSet<>(); 189 for (FeatureI f : this) { 190 //enter in a set -- removes duplicates 191 set.add(f.group()); 192 } 193 194 return set; 195 } 196 197 /** 198 * Create a collection of the unique values for the specified key. 199 * Example: For GTF files, using the "gene_id" key will give the names of all 200 * the genes in this list. 201 * 202 * @return A collection (suitable for iteration using Java's "for" loop) of all the 203 * values found for this key. The order of the values is undefined; it will not match 204 * the order of features in the list. 205 */ 206 public Collection<String> attributeValues(String key) { 207 if (featindex.containsKey(key)){ 208 Map<String, List<FeatureI>> map = featindex.get(key); 209 Collection<String> result = map.keySet(); 210 if (result == null) result = new HashSet<>(); 211 return Collections.unmodifiableCollection(result); 212 } 213 LinkedHashMap<String, String> hash = new LinkedHashMap<>(); 214 for (FeatureI f : this) { 215 //enter as a key -- removes duplicates 216 hash.put(f.getAttribute(key), null); 217 } 218 219 return Collections.unmodifiableCollection(hash.keySet()); 220 } 221 222 /** 223 * Create a list of all features that have the specified group id, as defined by 224 * the group() method of the features. 225 * 226 * @param groupid The group to match. 227 * @return A list of features having the specified group id. 228 */ 229 public FeatureList selectByGroup(String groupid) { 230 FeatureList list = new FeatureList(); 231 for (FeatureI f : this) { 232 if (f.group().equals(groupid)) { 233 list.add(f); 234 } 235 } 236 237 return list; 238 } 239 240 /** 241 * Create a list of all features that are of the specified type, as defined by 242 * the type() method of the features. 243 * This might be, for example, "exon" or "CDS". 244 * 245 * @param type The type to match. 246 * @return A list of features of the specified type. 247 */ 248 public FeatureList selectByType(String type) { 249 FeatureList list = new FeatureList(); 250 for (FeatureI f : this) { 251 if (f.type().equals(type)) { 252 list.add(f); 253 } 254 } 255 256 return list; 257 } 258 259 /** 260 * Create a list of all features that include the specified attribute key/value pair. 261 * This method now properly supports adding the index before or after adding the features. 262 * Adding features, then then index, then more features is still not supported. 263 * 264 * @param key The key to consider. 265 * @param value The value to consider. 266 * @return A list of features that include the key/value pair. 267 */ 268 public FeatureList selectByAttribute(String key, String value) { 269 if (featindex.containsKey(key)){ 270 Map<String,List<FeatureI>> featuresmap = featindex.get(key); 271 if (featuresmap==null) return new FeatureList(); 272 List<FeatureI> list = featuresmap.get(value); 273 if (list == null){ 274 return new FeatureList(); 275 } 276 return new FeatureList(list); 277 } 278 FeatureList list = new FeatureList(); 279 for (FeatureI f : this) { 280 if (f.hasAttribute(key, value)) { 281 list.add(f); 282 } 283 } 284 return list; 285 } 286 /** 287 * Create a list of all features that include the specified attribute key. 288 * 289 * @param key The key to consider. 290 * @return A list of features that include the key. 291 */ 292 public FeatureList selectByAttribute(String key) { 293 FeatureList list = new FeatureList(); 294 if (featindex.containsKey(key)){ 295 Map<String, List<FeatureI>> featsmap =featindex.get(key); 296 if(null != featsmap) { 297 for (List<FeatureI> feats: featsmap.values()){ 298 list.addAll(Collections.unmodifiableCollection(feats)); 299 } 300 return list; 301 } 302 } 303 304 for (FeatureI f : this) { 305 if (f.hasAttribute(key)) { 306 list.add(f); 307 } 308 } 309 return list; 310 } 311 312 /** 313 * Create a list of all features that include the specified key/value pair in their userMap(). 314 * 315 * @param key The key to consider. 316 * @param value The value to consider. 317 * @return A list of features that include the key/value pair. 318 */ 319 public FeatureList selectByUserData(String key, Object value) { 320 FeatureList list = new FeatureList(); 321 for (FeatureI f : this) { 322 Object o = f.userData().get(key); 323 if (o != null && o.equals(value)) { 324 list.add(f); 325 } 326 } 327 return list; 328 } 329 /** 330 * Create a list of all features that include the specified key in their userMap(). 331 * 332 * @param key The key to consider. 333 * @return A list of features that include the key. 334 */ 335 public FeatureList selectByUserData(String key) { 336 FeatureList list = new FeatureList(); 337 for (FeatureI f : this) { 338 if (f.userData().containsKey(key)) { 339 list.add(f); 340 } 341 } 342 return list; 343 } 344 345 /** 346 * Create a list of all features that overlap the specified location on the specified 347 * sequence. 348 * 349 * @param seqname The sequence name. Only features with this sequence name will be checked for overlap. 350 * @param location The location to check. 351 * @param useBothStrands If true, locations are mapped to their positive strand image 352 * before being checked for overlap. If false, only features whose locations are 353 * on the same strand as the specified location will be considered for inclusion. 354 * @return The new list of features that overlap the location. 355 */ 356 public FeatureList selectOverlapping(String seqname, Location location, boolean useBothStrands) 357 throws Exception { 358 FeatureList list = new FeatureList(); 359 360 for (FeatureI feature : this) { 361 boolean overlaps = false; 362 if (feature.seqname().equals(seqname)) { 363 if (location.isSameStrand(feature.location())) { 364 overlaps = feature.location().overlaps(location); 365 } else if (useBothStrands) { 366 overlaps = feature.location().overlaps(location.opposite()); 367 } 368 } 369 if (overlaps) { 370 list.add(feature); 371 } 372 } 373 return list; 374 } 375 376 /** 377 * Create a list of all features that do not overlap the specified location on the specified sequence. 378 * 379 * @param seqname The sequence name. Only features with this sequence name will be checked for overlap. 380 * @param location The location to check. 381 * @param useBothStrands If true, locations are mapped to their positive strand image 382 * before being checked for overlap. If false, all features whose locations are 383 * on the opposite strand from the specified location will be considered non-overlapping. 384 * @return The new list of features that do not overlap the location. 385 */ 386 public FeatureList omitOverlapping(String seqname, Location location, boolean useBothStrands) { 387 FeatureList list = new FeatureList(); 388 389 for (FeatureI feature : this) { 390 boolean overlaps = false; 391 if (feature.seqname().equals(seqname)) { 392 if (location.isSameStrand(feature.location())) { 393 overlaps = feature.location().overlaps(location); 394 } else if (useBothStrands) { 395 overlaps = feature.location().overlaps(location.opposite()); 396 } 397 } 398 399 if (!overlaps) { 400 list.add(feature); 401 } 402 } 403 404 return list; 405 } 406 407 /** 408 * Check if any feature in list has the specified attribute key. 409 * 410 * @param key The attribute key to consider. 411 * @return True if at least one feature has the attribute key. 412 */ 413 public boolean hasAttribute(String key) { 414 if (featindex.containsKey(key)){ 415 Map<String, List<FeatureI>> mappa = featindex.get(key); 416 if (mappa!= null && mappa.size()>0)return true; 417 return false; 418 } 419 for (FeatureI f : this) { 420 if (f.hasAttribute(key)) { 421 return true; 422 } 423 } 424 425 return false; 426 } 427 428 /** 429 * Check if any feature in list has the specified attribute key/value pair. 430 * 431 * @param key The attribute key to consider. 432 * @param value The attribute value to consider. 433 * @return True if at least one feature has the key/value pair. 434 */ 435 public boolean hasAttribute(String key, String value) { 436 if (featindex.containsKey(key)){ 437 Map<String, List<FeatureI>> mappa = featindex.get(key); 438 if (mappa == null) return false; 439 if (mappa.containsKey(value)) return true; 440 return false; 441 } 442 443 for (FeatureI f : this) { 444 if (f.hasAttribute(key, value)) { 445 return true; 446 } 447 } 448 449 return false; 450 } 451 452 /** 453 * Return a string representation of all features in this list. 454 * 455 * @return A string. 456 */ 457 @Override 458 public String toString() { 459 StringBuilder s = new StringBuilder("FeatureList: >>\n"); 460 for (FeatureI f : this) { 461 s.append( f.seqname() + ":" + f.toString() + "\n"); 462 } 463 464 s.append("\n<<\n"); 465 return s.toString(); 466 } 467 468 /** 469 * used by sort routine 470 */ 471 private class FeatureComparator implements Comparator<FeatureI>, Serializable { 472 private static final long serialVersionUID = 1; 473 474 @Override 475 public int compare(FeatureI a, FeatureI b) { 476 if (a.seqname().equals(b.seqname()) && a.location().isSameStrand(b.location())) { 477 return a.location().start() - b.location().start(); //sort on start 478 } else { 479 throw new IndexOutOfBoundsException("Cannot compare/sort features whose locations are on opposite strands or with different seqname().\r\n" + a.toString() + "\r\n" + b.toString() ); 480 } 481 } 482 } 483 484 /** 485 * Create a new list that is ordered by the starting index of the features' locations. All locations 486 * must be on the same strand of the same sequence. 487 * 488 * @return An ordered list. 489 * @throws IndexOutOfBoundsException Cannot compare/sort features whose locations are on opposite strands, or 490 * whose seqnames differ. 491 */ 492 public FeatureList sortByStart() { 493 FeatureI[] array = toArray(new FeatureI[1]); 494 495 Arrays.sort(array, new FeatureComparator()); 496 497 return new FeatureList(Arrays.asList(array)); 498 } 499 500 /** 501 * Add a list of attributes that will be used as indexes for queries 502 * @param indexes the List containing the attribute_id 503 */ 504 public void addIndexes(List<String> indexes) { 505 for (String index : indexes){ 506 addIndex(index); 507 } 508 509 } 510 /** 511 * Add an attribute that will be used as index for queries 512 * @param index an attribute_id 513 */ 514 public void addIndex(String index) { 515 featindex.put(index, null); 516 } 517}