001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.parsers.gff; 022 023import org.biojava.nbio.core.sequence.DNASequence; 024 025import java.util.*; 026import java.util.Map.Entry; 027 028 029 030/** 031 * A list of FeatureI objects implemented using a Java ArrayList; corresponds to a GFF file. 032 * This class is implemented entirely using FeatureI objects, so everything here will work 033 * correctly if you choose to implement your own feature class -- there are no dependencies 034 * on JavaGene's native Feature class. 035 * 036 * 037 * @author Hanno Hinsch, Carmelo Foti 038 */ 039@SuppressWarnings("serial") 040public class FeatureList extends ArrayList<FeatureI> { 041 042 Map<String, Map<String,List<FeatureI>>> featindex = new HashMap<String,Map<String,List<FeatureI>>>(); 043 Location mLocation; //genomic location (union of feature locations) 044 045 /** 046 * Construct an empty list. 047 */ 048 public FeatureList() { 049 mLocation = null; 050 } 051 052 /** 053 * Construct a new list containing the same features 054 * as the specified list. 055 * 056 * @param features An existing list or collection of FeatureI objects. 057 */ 058 public FeatureList(Collection<FeatureI> features) { 059 this(); 060 this.add(features); 061 062 mLocation = null; 063 } 064 065 /** 066 * Add specified feature to the end of the list. Updates the bounding location of the 067 * feature list, if needed. 068 * 069 * @param feature The FeatureI object to add. 070 * @return True if the feature was added. 071 */ 072 @Override 073 public boolean add(FeatureI feature) { 074 if (mLocation == null) { 075 mLocation = feature.location().plus(); 076 } else if (null != feature.location()) { 077 mLocation = mLocation.union(feature.location().plus()); 078 } 079 for (Entry<String, String> entry : feature.getAttributes().entrySet()){ 080 if (featindex.containsKey(entry.getKey())){ 081 Map<String,List<FeatureI>> feat = featindex.get(entry.getKey()); 082 if (feat==null){ 083 feat= new HashMap<String,List<FeatureI>>(); 084 } 085 List<FeatureI> features = feat.get(entry.getValue()); 086 if (features==null){ 087 features = new ArrayList<FeatureI>(); 088 } 089 features.add(feature); 090 feat.put(entry.getValue(), features); 091 featindex.put(entry.getKey(), feat); 092 //featindex.put(key, value) 093 } 094 } 095 096 return super.add(feature); 097 } 098 099 /** 100 * Add all features in the specified list or collection to this list. 101 * 102 * @param list The collection of FeatureI objects. 103 */ 104 public void add(Collection<FeatureI> list) { 105 for (FeatureI f : list) { 106 add(f); 107 } 108 } 109 110 /** 111 * The union of all locations of all features in this list, mapped to the positive strand. 112 * If an added feature is on the negative strand, its positive strand image is added 113 * to the union. 114 * The bounding location is not updated when a feature is removed from the list, so 115 * it is not guaranteed to be the minimal bounding location. 116 * 117 * @return A location that is the union of all feature locations in the list. 118 */ 119 public Location bounds() { 120 return mLocation; 121 } 122 123 /** 124 * Check size of gaps between successive features in list. The features in 125 * the list are assumed to be appropriately ordered. 126 * 127 * @param gapLength The minimum gap length to consider. Use a gapLength 128 * of 0 to check if features are contiguous. 129 * @return True if list has any gaps equal to or greater than gapLength. 130 */ 131 public boolean hasGaps(int gapLength) { 132 Location last = null; 133 for (FeatureI f : this) { 134 if (last != null && gapLength <= f.location().distance(last)) { 135 return true; 136 } else { 137 last = f.location(); 138 } 139 } 140 141 return false; 142 } 143 144 145 146 147 /** 148 * Concatenate successive portions of the specified sequence 149 * using the feature locations in the list. The list is assumed to be appropriately 150 * ordered. 151 * 152 * @param sequence The source sequence from which portions should be selected. 153 * @return The spliced data. 154 * @throws IllegalStateException Out of order or overlapping FeatureI locations detected. 155 * 156 */ 157 public String splice(DNASequence sequence) { 158 StringBuilder subData = new StringBuilder(); 159 Location last = null; 160 161 for (FeatureI f : this) { 162 Location loc = f.location(); 163 164 if (last == null || loc.startsAfter(last)) { 165 subData.append(sequence.getSubSequence(loc.start(), loc.end()).toString()); 166 last = loc; 167 } else { 168 throw new IllegalStateException("Splice: Feature locations should not overlap."); 169 } 170 171 } 172 173 return subData.toString(); 174 } 175 176 /** 177 * Create a collection of all unique group ids in the list, as defined 178 * by the group() method of the features. For example, if the 179 * features are from a GFF1 file, then each group id identifies a particular gene, 180 * and this method returns a collection of all gene ids. 181 * 182 * @return A collection (suitable for iteration using Java's "for" loop) of all the 183 * group ids found in this list. The order of the values is undefined; it will not match 184 * the order of features in the list. 185 */ 186 public Collection<String> groupValues() { 187 Set<String> set = new HashSet<String>(); 188 for (FeatureI f : this) { 189 //enter in a set -- removes duplicates 190 set.add(f.group()); 191 } 192 193 return set; 194 } 195 196 /** 197 * Create a collection of the unique values for the specified key. 198 * Example: For GTF files, using the "gene_id" key will give the names of all 199 * the genes in this list. 200 * 201 * @return A collection (suitable for iteration using Java's "for" loop) of all the 202 * values found for this key. The order of the values is undefined; it will not match 203 * the order of features in the list. 204 */ 205 public Collection<String> attributeValues(String key) { 206 if (featindex.containsKey(key)){ 207 Map<String, List<FeatureI>> map = featindex.get(key); 208 Collection<String> result = map.keySet(); 209 if (result == null) result = new HashSet<String>(); 210 return Collections.unmodifiableCollection(result); 211 } 212 LinkedHashMap<String, String> hash = new LinkedHashMap<String, String>(); 213 for (FeatureI f : this) { 214 //enter as a key -- removes duplicates 215 hash.put(f.getAttribute(key), null); 216 } 217 218 return Collections.unmodifiableCollection(hash.keySet()); 219 } 220 221 /** 222 * Create a list of all features that have the specified group id, as defined by 223 * the group() method of the features. 224 * 225 * @param groupid The group to match. 226 * @return A list of features having the specified group id. 227 */ 228 public FeatureList selectByGroup(String groupid) { 229 FeatureList list = new FeatureList(); 230 for (FeatureI f : this) { 231 if (f.group().equals(groupid)) { 232 list.add(f); 233 } 234 } 235 236 return list; 237 } 238 239 /** 240 * Create a list of all features that are of the specified type, as defined by 241 * the type() method of the features. 242 * This might be, for example, "exon" or "CDS". 243 * 244 * @param type The type to match. 245 * @return A list of features of the specified type. 246 */ 247 public FeatureList selectByType(String type) { 248 FeatureList list = new FeatureList(); 249 for (FeatureI f : this) { 250 if (f.type().equals(type)) { 251 list.add(f); 252 } 253 } 254 255 return list; 256 } 257 258 /** 259 * Create a list of all features that include the specified attribute key/value pair. 260 * This method now properly supports adding the index before or after adding the features. 261 * Adding features, then then index, then more features is still not supported. 262 * 263 * @param key The key to consider. 264 * @param value The value to consider. 265 * @return A list of features that include the key/value pair. 266 */ 267 public FeatureList selectByAttribute(String key, String value) { 268 if (featindex.containsKey(key)){ 269 Map<String,List<FeatureI>> featuresmap = featindex.get(key); 270 if (featuresmap==null) return new FeatureList(); 271 List<FeatureI> list = featuresmap.get(value); 272 if (list == null){ 273 return new FeatureList(); 274 } 275 return new FeatureList(list); 276 } 277 FeatureList list = new FeatureList(); 278 for (FeatureI f : this) { 279 if (f.hasAttribute(key, value)) { 280 list.add(f); 281 } 282 } 283 return list; 284 } 285 /** 286 * Create a list of all features that include the specified attribute key. 287 * 288 * @param key The key to consider. 289 * @return A list of features that include the key. 290 */ 291 public FeatureList selectByAttribute(String key) { 292 FeatureList list = new FeatureList(); 293 if (featindex.containsKey(key)){ 294 Map<String, List<FeatureI>> featsmap =featindex.get(key); 295 if(null != featsmap) { 296 for (List<FeatureI> feats: featsmap.values()){ 297 list.addAll(Collections.unmodifiableCollection(feats)); 298 } 299 return list; 300 } 301 } 302 303 for (FeatureI f : this) { 304 if (f.hasAttribute(key)) { 305 list.add(f); 306 } 307 } 308 return list; 309 } 310 311 /** 312 * Create a list of all features that include the specified key/value pair in their userMap(). 313 * 314 * @param key The key to consider. 315 * @param value The value to consider. 316 * @return A list of features that include the key/value pair. 317 */ 318 public FeatureList selectByUserData(String key, Object value) { 319 FeatureList list = new FeatureList(); 320 for (FeatureI f : this) { 321 Object o = f.userData().get(key); 322 if (o != null && o.equals(value)) { 323 list.add(f); 324 } 325 } 326 return list; 327 } 328 /** 329 * Create a list of all features that include the specified key in their userMap(). 330 * 331 * @param key The key to consider. 332 * @return A list of features that include the key. 333 */ 334 public FeatureList selectByUserData(String key) { 335 FeatureList list = new FeatureList(); 336 for (FeatureI f : this) { 337 if (f.userData().containsKey(key)) { 338 list.add(f); 339 } 340 } 341 return list; 342 } 343 344 /** 345 * Create a list of all features that overlap the specified location on the specified 346 * sequence. 347 * 348 * @param seqname The sequence name. Only features with this sequence name will be checked for overlap. 349 * @param location The location to check. 350 * @param useBothStrands If true, locations are mapped to their positive strand image 351 * before being checked for overlap. If false, only features whose locations are 352 * on the same strand as the specified location will be considered for inclusion. 353 * @return The new list of features that overlap the location. 354 */ 355 public FeatureList selectOverlapping(String seqname, Location location, boolean useBothStrands) 356 throws Exception { 357 FeatureList list = new FeatureList(); 358 359 for (FeatureI feature : this) { 360 boolean overlaps = false; 361 if (feature.seqname().equals(seqname)) { 362 if (location.isSameStrand(feature.location())) { 363 overlaps = feature.location().overlaps(location); 364 } else if (useBothStrands) { 365 overlaps = feature.location().overlaps(location.opposite()); 366 } 367 } 368 if (overlaps) { 369 list.add(feature); 370 } 371 } 372 return list; 373 } 374 375 /** 376 * Create a list of all features that do not overlap the specified location on the specified sequence. 377 * 378 * @param seqname The sequence name. Only features with this sequence name will be checked for overlap. 379 * @param location The location to check. 380 * @param useBothStrands If true, locations are mapped to their positive strand image 381 * before being checked for overlap. If false, all features whose locations are 382 * on the opposite strand from the specified location will be considered non-overlapping. 383 * @return The new list of features that do not overlap the location. 384 */ 385 public FeatureList omitOverlapping(String seqname, Location location, boolean useBothStrands) { 386 FeatureList list = new FeatureList(); 387 388 for (FeatureI feature : this) { 389 boolean overlaps = false; 390 if (feature.seqname().equals(seqname)) { 391 if (location.isSameStrand(feature.location())) { 392 overlaps = feature.location().overlaps(location); 393 } else if (useBothStrands) { 394 overlaps = feature.location().overlaps(location.opposite()); 395 } 396 } 397 398 if (!overlaps) { 399 list.add(feature); 400 } 401 } 402 403 return list; 404 } 405 406 /** 407 * Check if any feature in list has the specified attribute key. 408 * 409 * @param key The attribute key to consider. 410 * @return True if at least one feature has the attribute key. 411 */ 412 public boolean hasAttribute(String key) { 413 if (featindex.containsKey(key)){ 414 Map<String, List<FeatureI>> mappa = featindex.get(key); 415 if (mappa!= null && mappa.size()>0)return true; 416 return false; 417 } 418 for (FeatureI f : this) { 419 if (f.hasAttribute(key)) { 420 return true; 421 } 422 } 423 424 return false; 425 } 426 427 /** 428 * Check if any feature in list has the specified attribute key/value pair. 429 * 430 * @param key The attribute key to consider. 431 * @param value The attribute value to consider. 432 * @return True if at least one feature has the key/value pair. 433 */ 434 public boolean hasAttribute(String key, String value) { 435 if (featindex.containsKey(key)){ 436 Map<String, List<FeatureI>> mappa = featindex.get(key); 437 if (mappa == null) return false; 438 if (mappa.containsKey(value)) return true; 439 return false; 440 } 441 442 for (FeatureI f : this) { 443 if (f.hasAttribute(key, value)) { 444 return true; 445 } 446 } 447 448 return false; 449 } 450 451 /** 452 * Return a string representation of all features in this list. 453 * 454 * @return A string. 455 */ 456 @Override 457 public String toString() { 458 StringBuilder s = new StringBuilder("FeatureList: >>\n"); 459 for (FeatureI f : this) { 460 s.append( f.seqname() + ":" + f.toString() + "\n"); 461 } 462 463 s.append("\n<<\n"); 464 return s.toString(); 465 } 466 467 /** 468 * used by sort routine 469 */ 470 private class FeatureComparator implements Comparator<FeatureI> { 471 472 @Override 473 public int compare(FeatureI a, FeatureI b) { 474 if (a.seqname().equals(b.seqname()) && a.location().isSameStrand(b.location())) { 475 return a.location().start() - b.location().start(); //sort on start 476 } else { 477 throw new IndexOutOfBoundsException("Cannot compare/sort features whose locations are on opposite strands or with different seqname().\r\n" + a.toString() + "\r\n" + b.toString() ); 478 } 479 } 480 } 481 482 /** 483 * Create a new list that is ordered by the starting index of the features' locations. All locations 484 * must be on the same strand of the same sequence. 485 * 486 * @return An ordered list. 487 * @throws IndexOutOfBoundsException Cannot compare/sort features whose locations are on opposite strands, or 488 * whose seqnames differ. 489 */ 490 public FeatureList sortByStart() { 491 FeatureI array[] = toArray(new FeatureI[1]); 492 493 Arrays.sort(array, new FeatureComparator()); 494 495 return new FeatureList(Arrays.asList(array)); 496 } 497 498 /** 499 * @deprecated 500 * 501 */ 502 // FIXME features may have a null location() !! 503 @Deprecated 504 static public void main(String args[]) { 505 } 506 507 508 /** 509 * Add a list of attributes that will be used as indexes for queries 510 * @param indexes the List containing the attribute_id 511 */ 512 public void addIndexes(List<String> indexes) { 513 for (String index : indexes){ 514 addIndex(index); 515 } 516 517 } 518 /** 519 * Add an attribute that will be used as index for queries 520 * @param index an attribute_id 521 */ 522 public void addIndex(String index) { 523 featindex.put(index, null); 524 } 525}