001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023import org.biojava.nbio.core.sequence.DNASequence;
024
025import java.io.Serializable;
026import java.util.*;
027import java.util.Map.Entry;
028
029
030
031/**
032 * A list of FeatureI objects implemented using a Java ArrayList; corresponds to a GFF file.
033 * This class is implemented entirely using FeatureI objects, so everything here will work
034 * correctly if you choose to implement your own feature class -- there are no dependencies
035 * on JavaGene's native Feature class.
036 *
037 *
038 * @author Hanno Hinsch, Carmelo Foti
039 */
040@SuppressWarnings("serial")
041public class FeatureList extends ArrayList<FeatureI> {
042
043         Map<String, Map<String,List<FeatureI>>> featindex = new HashMap<String,Map<String,List<FeatureI>>>();
044        Location mLocation;                     //genomic location (union of feature locations)
045
046        /**
047         * Construct an empty list.
048         */
049        public FeatureList() {
050                mLocation = null;
051        }
052
053        /**
054         * Construct a new list containing the same features
055         * as the specified list.
056         *
057         * @param features An existing list or collection of FeatureI objects.
058         */
059        public FeatureList(Collection<FeatureI> features) {
060                this();
061                this.add(features);
062
063                mLocation = null;
064        }
065
066        /**
067         * Add specified feature to the end of the list. Updates the bounding location of the
068         * feature list, if needed.
069         *
070         * @param feature The FeatureI object to add.
071         * @return True if the feature was added.
072         */
073        @Override
074        public boolean add(FeatureI feature) {
075                if (mLocation == null) {
076                        mLocation = feature.location().plus();
077                } else if (null != feature.location()) {
078                        mLocation = mLocation.union(feature.location().plus());
079                }
080                for (Entry<String, String> entry : feature.getAttributes().entrySet()){
081                        if (featindex.containsKey(entry.getKey())){
082                                Map<String,List<FeatureI>> feat = featindex.get(entry.getKey());
083                                if (feat==null){
084                                        feat= new HashMap<String,List<FeatureI>>();
085                                }
086                                List<FeatureI> features = feat.get(entry.getValue());
087                                if (features==null){
088                                        features = new ArrayList<FeatureI>();
089                                }
090                                features.add(feature);
091                                feat.put(entry.getValue(), features);
092                                featindex.put(entry.getKey(), feat);
093                                //featindex.put(key, value)
094                        }
095                }
096
097                return super.add(feature);
098        }
099
100        /**
101         * Add all features in the specified list or collection to this list.
102         *
103         * @param list The collection of FeatureI objects.
104         */
105        public void add(Collection<FeatureI> list) {
106                for (FeatureI f : list) {
107                        add(f);
108                }
109        }
110
111        /**
112         * The union of all locations of all features in this list, mapped to the positive strand.
113         * If an added feature is on the negative strand, its positive strand image is added
114         * to the union.
115         * The bounding location is not updated when a feature is removed from the list, so
116         * it is not guaranteed to be the minimal bounding location.
117         *
118         * @return A location that is the union of all feature locations in the list.
119         */
120        public Location bounds() {
121                return mLocation;
122        }
123
124        /**
125         * Check size of gaps between successive features in list. The features in
126         * the list are assumed to be appropriately ordered.
127         *
128         * @param gapLength The minimum gap length to consider. Use a gapLength
129         * of 0 to check if features are contiguous.
130         * @return True if list has any gaps equal to or greater than gapLength.
131         */
132        public boolean hasGaps(int gapLength) {
133                Location last = null;
134                for (FeatureI f : this) {
135                        if (last != null && gapLength <= f.location().distance(last)) {
136                                return true;
137                        } else {
138                                last = f.location();
139                        }
140                }
141
142                return false;
143        }
144
145
146
147
148        /**
149         * Concatenate successive portions of the specified sequence
150         * using the feature locations in the list. The list is assumed to be appropriately
151         * ordered.
152         *
153         * @param sequence The source sequence from which portions should be selected.
154         * @return The spliced data.
155         * @throws IllegalStateException Out of order or overlapping FeatureI locations detected.
156         *
157         */
158        public String splice(DNASequence sequence) {
159                StringBuilder subData = new StringBuilder();
160                Location last = null;
161
162                for (FeatureI f : this) {
163                        Location loc = f.location();
164
165                        if (last == null || loc.startsAfter(last)) {
166                                subData.append(sequence.getSubSequence(loc.start(), loc.end()).toString());
167                                last = loc;
168                        } else {
169                                throw new IllegalStateException("Splice: Feature locations should not overlap.");
170                        }
171
172                }
173
174                return subData.toString();
175        }
176
177        /**
178         * Create a collection of all unique group ids in the list, as defined
179         * by the group() method of the features. For example, if the
180         * features are from a GFF1 file, then each group id identifies a particular gene,
181         * and this method returns a collection of all gene ids.
182         *
183         * @return A collection (suitable for iteration using Java's "for" loop) of all the
184         * group ids found in this list. The order of the values is undefined; it will not match
185         * the order of features in the list.
186         */
187        public Collection<String> groupValues() {
188                Set<String> set = new HashSet<String>();
189                for (FeatureI f : this) {
190                        //enter in a set -- removes duplicates
191                        set.add(f.group());
192                }
193
194                return set;
195        }
196
197        /**
198         * Create a collection of the unique values for the specified key.
199         * Example: For GTF files, using the "gene_id" key will give the names of all
200         * the genes in this list.
201         *
202         * @return A collection (suitable for iteration using Java's "for" loop) of all the
203         * values found for this key. The order of the values is undefined; it will not match
204         * the order of features in the list.
205         */
206        public Collection<String> attributeValues(String key) {
207                if (featindex.containsKey(key)){
208                        Map<String, List<FeatureI>> map = featindex.get(key);
209                        Collection<String> result = map.keySet();
210                        if (result == null) result = new HashSet<String>();
211                        return Collections.unmodifiableCollection(result);
212                }
213                LinkedHashMap<String, String> hash = new LinkedHashMap<String, String>();
214                for (FeatureI f : this) {
215                        //enter as a key -- removes duplicates
216                        hash.put(f.getAttribute(key), null);
217                }
218
219                return Collections.unmodifiableCollection(hash.keySet());
220        }
221
222        /**
223         * Create a list of all features that have the specified group id, as defined by
224         * the group() method of the features.
225         *
226         * @param groupid The group to match.
227         * @return A list of features having the specified group id.
228         */
229        public FeatureList selectByGroup(String groupid) {
230                FeatureList list = new FeatureList();
231                for (FeatureI f : this) {
232                        if (f.group().equals(groupid)) {
233                                list.add(f);
234                        }
235                }
236
237                return list;
238        }
239
240        /**
241         * Create a list of all features that are of the specified type, as defined by
242         * the type() method of the features.
243         * This might be, for example, "exon" or "CDS".
244         *
245         * @param type The type to match.
246         * @return A list of features of the specified type.
247         */
248        public FeatureList selectByType(String type) {
249                FeatureList list = new FeatureList();
250                for (FeatureI f : this) {
251                        if (f.type().equals(type)) {
252                                list.add(f);
253                        }
254                }
255
256                return list;
257        }
258
259        /**
260         * Create a list of all features that include the specified attribute key/value pair.
261         * This method now properly supports adding the index before or after adding the features.
262         * Adding features, then then index, then more features is still not supported.
263         *
264         * @param key The key to consider.
265         * @param value The value to consider.
266         * @return A list of features that include the key/value pair.
267         */
268        public FeatureList selectByAttribute(String key, String value) {
269                if (featindex.containsKey(key)){
270                        Map<String,List<FeatureI>> featuresmap = featindex.get(key);
271                        if (featuresmap==null) return new FeatureList();
272                        List<FeatureI> list = featuresmap.get(value);
273                        if (list == null){
274                                return new FeatureList();
275                        }
276                        return  new FeatureList(list);
277                }
278                FeatureList list = new FeatureList();
279                for (FeatureI f : this) {
280                        if (f.hasAttribute(key, value)) {
281                                list.add(f);
282                        }
283                }
284                return list;
285        }
286        /**
287         * Create a list of all features that include the specified attribute key.
288         *
289         * @param key The key to consider.
290         * @return A list of features that include the key.
291         */
292        public FeatureList selectByAttribute(String key) {
293                FeatureList list = new FeatureList();
294                if (featindex.containsKey(key)){
295                        Map<String, List<FeatureI>> featsmap =featindex.get(key);
296                        if(null != featsmap) {
297                                for (List<FeatureI> feats: featsmap.values()){
298                                        list.addAll(Collections.unmodifiableCollection(feats));
299                                }
300                                return list;
301                        }
302                }
303
304                for (FeatureI f : this) {
305                        if (f.hasAttribute(key)) {
306                                list.add(f);
307                        }
308                }
309                return list;
310        }
311
312        /**
313         * Create a list of all features that include the specified key/value pair in their userMap().
314         *
315         * @param key The key to consider.
316         * @param value The value to consider.
317         * @return A list of features that include the key/value pair.
318         */
319        public FeatureList selectByUserData(String key, Object value) {
320                FeatureList list = new FeatureList();
321                for (FeatureI f : this) {
322                        Object o = f.userData().get(key);
323                        if (o != null && o.equals(value)) {
324                                list.add(f);
325                        }
326                }
327                return list;
328        }
329        /**
330         * Create a list of all features that include the specified key in their userMap().
331         *
332         * @param key The key to consider.
333         * @return A list of features that include the key.
334         */
335        public FeatureList selectByUserData(String key) {
336                FeatureList list = new FeatureList();
337                for (FeatureI f : this) {
338                        if (f.userData().containsKey(key)) {
339                                list.add(f);
340                        }
341                }
342                return list;
343        }
344
345        /**
346         * Create a list of all features that overlap the specified location on the specified
347         * sequence.
348         *
349         * @param seqname The sequence name. Only features with this sequence name will be checked for overlap.
350         * @param location The location to check.
351         * @param useBothStrands If true, locations are mapped to their positive strand image
352         * before being checked for overlap. If false, only features whose locations are
353         * on the same strand as the specified location will be considered for inclusion.
354         * @return The new list of features that overlap the location.
355         */
356        public FeatureList selectOverlapping(String seqname, Location location, boolean useBothStrands)
357                        throws Exception {
358                FeatureList list = new FeatureList();
359
360                for (FeatureI feature : this) {
361                        boolean overlaps = false;
362                        if (feature.seqname().equals(seqname)) {
363                                if (location.isSameStrand(feature.location())) {
364                                        overlaps = feature.location().overlaps(location);
365                                } else if (useBothStrands) {
366                                        overlaps = feature.location().overlaps(location.opposite());
367                                }
368                        }
369                        if (overlaps) {
370                                list.add(feature);
371                        }
372                }
373                return list;
374        }
375
376        /**
377         * Create a list of all features that do not overlap the specified location on the specified sequence.
378         *
379         * @param seqname The sequence name. Only features with this sequence name will be checked for overlap.
380         * @param location The location to check.
381         * @param useBothStrands If true, locations are mapped to their positive strand image
382         * before being checked for overlap. If false, all features whose locations are
383         * on the opposite strand from the specified location will be considered non-overlapping.
384         * @return The new list of features that do not overlap the location.
385         */
386        public FeatureList omitOverlapping(String seqname, Location location, boolean useBothStrands) {
387                FeatureList list = new FeatureList();
388
389                for (FeatureI feature : this) {
390                        boolean overlaps = false;
391                        if (feature.seqname().equals(seqname)) {
392                                if (location.isSameStrand(feature.location())) {
393                                        overlaps = feature.location().overlaps(location);
394                                } else if (useBothStrands) {
395                                        overlaps = feature.location().overlaps(location.opposite());
396                                }
397                        }
398
399                        if (!overlaps) {
400                                list.add(feature);
401                        }
402                }
403
404                return list;
405        }
406
407        /**
408         * Check if any feature in list has the specified attribute key.
409         *
410         * @param key The attribute key to consider.
411         * @return True if at least one feature has the attribute key.
412         */
413        public boolean hasAttribute(String key) {
414                if (featindex.containsKey(key)){
415                        Map<String, List<FeatureI>> mappa = featindex.get(key);
416                        if (mappa!= null && mappa.size()>0)return true;
417                        return false;
418                }
419                for (FeatureI f : this) {
420                        if (f.hasAttribute(key)) {
421                                return true;
422                        }
423                }
424
425                return false;
426        }
427
428        /**
429         * Check if any feature in list has the specified attribute key/value pair.
430         *
431         * @param key The attribute key to consider.
432         * @param value The attribute value to consider.
433         * @return True if at least one feature has the key/value pair.
434         */
435        public boolean hasAttribute(String key, String value) {
436                if (featindex.containsKey(key)){
437                        Map<String, List<FeatureI>> mappa = featindex.get(key);
438                        if (mappa == null) return false;
439                        if (mappa.containsKey(value)) return true;
440                        return false;
441                }
442
443                for (FeatureI f : this) {
444                        if (f.hasAttribute(key, value)) {
445                                return true;
446                        }
447                }
448
449                return false;
450        }
451
452        /**
453         * Return a string representation of all features in this list.
454         *
455         * @return A string.
456         */
457        @Override
458        public String toString() {
459                StringBuilder  s = new StringBuilder("FeatureList: >>\n");
460                for (FeatureI f : this) {
461                        s.append( f.seqname() + ":" + f.toString() + "\n");
462                }
463
464                s.append("\n<<\n");
465                return s.toString();
466        }
467
468        /**
469         * used by sort routine
470         */
471        private class FeatureComparator implements Comparator<FeatureI>, Serializable {
472        private static final long serialVersionUID = 1;
473
474                @Override
475                public int compare(FeatureI a, FeatureI b) {
476                        if (a.seqname().equals(b.seqname()) && a.location().isSameStrand(b.location())) {
477                                return a.location().start() - b.location().start();             //sort on start
478                        } else {
479                                throw new IndexOutOfBoundsException("Cannot compare/sort features whose locations are on opposite strands or with different seqname().\r\n" + a.toString() + "\r\n" + b.toString() );
480                        }
481                }
482        }
483
484        /**
485         * Create a new list that is ordered by the starting index of the features' locations. All locations
486         * must be on the same strand of the same sequence.
487         *
488         * @return An ordered list.
489         * @throws IndexOutOfBoundsException Cannot compare/sort features whose locations are on opposite strands, or
490         * whose seqnames differ.
491         */
492        public FeatureList sortByStart() {
493                FeatureI[] array = toArray(new FeatureI[1]);
494
495                Arrays.sort(array, new FeatureComparator());
496
497                return new FeatureList(Arrays.asList(array));
498        }
499
500        /**
501         * Add a list of attributes that will be used as indexes for queries
502         * @param indexes  the List containing the attribute_id
503         */
504        public void addIndexes(List<String> indexes) {
505                for (String index : indexes){
506                        addIndex(index);
507                }
508
509        }
510        /**
511         * Add an attribute that will be used as index for queries
512         * @param index an attribute_id
513         */
514        public void addIndex(String index) {
515                featindex.put(index, null);
516        }
517}