001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023import org.biojava.nbio.core.sequence.DNASequence;
024
025import java.util.*;
026import java.util.Map.Entry;
027
028
029
030/**
031 * A list of FeatureI objects implemented using a Java ArrayList; corresponds to a GFF file.
032 * This class is implemented entirely using FeatureI objects, so everything here will work
033 * correctly if you choose to implement your own feature class -- there are no dependencies
034 * on JavaGene's native Feature class.
035 *
036 *
037 * @author Hanno Hinsch, Carmelo Foti
038 */
039@SuppressWarnings("serial")
040public class FeatureList extends ArrayList<FeatureI> {
041
042         Map<String, Map<String,List<FeatureI>>> featindex = new HashMap<String,Map<String,List<FeatureI>>>();
043        Location mLocation;                     //genomic location (union of feature locations)
044
045        /**
046         * Construct an empty list.
047         */
048        public FeatureList() {
049                mLocation = null;
050        }
051
052        /**
053         * Construct a new list containing the same features
054         * as the specified list.
055         *
056         * @param features An existing list or collection of FeatureI objects.
057         */
058        public FeatureList(Collection<FeatureI> features) {
059                this();
060                this.add(features);
061
062                mLocation = null;
063        }
064
065        /**
066         * Add specified feature to the end of the list. Updates the bounding location of the
067         * feature list, if needed.
068         *
069         * @param feature The FeatureI object to add.
070         * @return True if the feature was added.
071         */
072        @Override
073        public boolean add(FeatureI feature) {
074                if (mLocation == null) {
075                        mLocation = feature.location().plus();
076                } else if (null != feature.location()) {
077                        mLocation = mLocation.union(feature.location().plus());
078                }
079                for (Entry<String, String> entry : feature.getAttributes().entrySet()){
080                        if (featindex.containsKey(entry.getKey())){
081                                Map<String,List<FeatureI>> feat = featindex.get(entry.getKey());
082                                if (feat==null){
083                                        feat= new HashMap<String,List<FeatureI>>();
084                                }
085                                List<FeatureI> features = feat.get(entry.getValue());
086                                if (features==null){
087                                        features = new ArrayList<FeatureI>();
088                                }
089                                features.add(feature);
090                                feat.put(entry.getValue(), features);
091                                featindex.put(entry.getKey(), feat);
092                                //featindex.put(key, value)
093                        }
094                }
095
096                return super.add(feature);
097        }
098
099        /**
100         * Add all features in the specified list or collection to this list.
101         *
102         * @param list The collection of FeatureI objects.
103         */
104        public void add(Collection<FeatureI> list) {
105                for (FeatureI f : list) {
106                        add(f);
107                }
108        }
109
110        /**
111         * The union of all locations of all features in this list, mapped to the positive strand.
112         * If an added feature is on the negative strand, its positive strand image is added
113         * to the union.
114         * The bounding location is not updated when a feature is removed from the list, so
115         * it is not guaranteed to be the minimal bounding location.
116         *
117         * @return A location that is the union of all feature locations in the list.
118         */
119        public Location bounds() {
120                return mLocation;
121        }
122
123        /**
124         * Check size of gaps between successive features in list. The features in
125         * the list are assumed to be appropriately ordered.
126         *
127         * @param gapLength The minimum gap length to consider. Use a gapLength
128         * of 0 to check if features are contiguous.
129         * @return True if list has any gaps equal to or greater than gapLength.
130         */
131        public boolean hasGaps(int gapLength) {
132                Location last = null;
133                for (FeatureI f : this) {
134                        if (last != null && gapLength <= f.location().distance(last)) {
135                                return true;
136                        } else {
137                                last = f.location();
138                        }
139                }
140
141                return false;
142        }
143
144
145
146
147        /**
148         * Concatenate successive portions of the specified sequence
149         * using the feature locations in the list. The list is assumed to be appropriately
150         * ordered.
151         *
152         * @param sequence The source sequence from which portions should be selected.
153         * @return The spliced data.
154         * @throws IllegalStateException Out of order or overlapping FeatureI locations detected.
155         *
156         */
157        public String splice(DNASequence sequence) {
158                StringBuilder subData = new StringBuilder();
159                Location last = null;
160
161                for (FeatureI f : this) {
162                        Location loc = f.location();
163
164                        if (last == null || loc.startsAfter(last)) {
165                                subData.append(sequence.getSubSequence(loc.start(), loc.end()).toString());
166                                last = loc;
167                        } else {
168                                throw new IllegalStateException("Splice: Feature locations should not overlap.");
169                        }
170
171                }
172
173                return subData.toString();
174        }
175
176        /**
177         * Create a collection of all unique group ids in the list, as defined
178         * by the group() method of the features. For example, if the
179         * features are from a GFF1 file, then each group id identifies a particular gene,
180         * and this method returns a collection of all gene ids.
181         *
182         * @return A collection (suitable for iteration using Java's "for" loop) of all the
183         * group ids found in this list. The order of the values is undefined; it will not match
184         * the order of features in the list.
185         */
186        public Collection<String> groupValues() {
187                Set<String> set = new HashSet<String>();
188                for (FeatureI f : this) {
189                        //enter in a set -- removes duplicates
190                        set.add(f.group());
191                }
192
193                return set;
194        }
195
196        /**
197         * Create a collection of the unique values for the specified key.
198         * Example: For GTF files, using the "gene_id" key will give the names of all
199         * the genes in this list.
200         *
201         * @return A collection (suitable for iteration using Java's "for" loop) of all the
202         * values found for this key. The order of the values is undefined; it will not match
203         * the order of features in the list.
204         */
205        public Collection<String> attributeValues(String key) {
206                if (featindex.containsKey(key)){
207                        Map<String, List<FeatureI>> map = featindex.get(key);
208                        Collection<String> result = map.keySet();
209                        if (result == null) result = new HashSet<String>();
210                        return Collections.unmodifiableCollection(result);
211                }
212                LinkedHashMap<String, String> hash = new LinkedHashMap<String, String>();
213                for (FeatureI f : this) {
214                        //enter as a key -- removes duplicates
215                        hash.put(f.getAttribute(key), null);
216                }
217
218                return Collections.unmodifiableCollection(hash.keySet());
219        }
220
221        /**
222         * Create a list of all features that have the specified group id, as defined by
223         * the group() method of the features.
224         *
225         * @param groupid The group to match.
226         * @return A list of features having the specified group id.
227         */
228        public FeatureList selectByGroup(String groupid) {
229                FeatureList list = new FeatureList();
230                for (FeatureI f : this) {
231                        if (f.group().equals(groupid)) {
232                                list.add(f);
233                        }
234                }
235
236                return list;
237        }
238
239        /**
240         * Create a list of all features that are of the specified type, as defined by
241         * the type() method of the features.
242         * This might be, for example, "exon" or "CDS".
243         *
244         * @param type The type to match.
245         * @return A list of features of the specified type.
246         */
247        public FeatureList selectByType(String type) {
248                FeatureList list = new FeatureList();
249                for (FeatureI f : this) {
250                        if (f.type().equals(type)) {
251                                list.add(f);
252                        }
253                }
254
255                return list;
256        }
257
258        /**
259         * Create a list of all features that include the specified attribute key/value pair.
260         * This method now properly supports adding the index before or after adding the features.
261         * Adding features, then then index, then more features is still not supported.
262         *
263         * @param key The key to consider.
264         * @param value The value to consider.
265         * @return A list of features that include the key/value pair.
266         */
267        public FeatureList selectByAttribute(String key, String value) {
268                if (featindex.containsKey(key)){
269                        Map<String,List<FeatureI>> featuresmap = featindex.get(key);
270                        if (featuresmap==null) return new FeatureList();
271                        List<FeatureI> list = featuresmap.get(value);
272                        if (list == null){
273                                return new FeatureList();
274                        }
275                        return  new FeatureList(list);
276                }
277                FeatureList list = new FeatureList();
278                for (FeatureI f : this) {
279                        if (f.hasAttribute(key, value)) {
280                                list.add(f);
281                        }
282                }
283                return list;
284        }
285        /**
286         * Create a list of all features that include the specified attribute key.
287         *
288         * @param key The key to consider.
289         * @return A list of features that include the key.
290         */
291        public FeatureList selectByAttribute(String key) {
292                FeatureList list = new FeatureList();
293                if (featindex.containsKey(key)){
294                        Map<String, List<FeatureI>> featsmap =featindex.get(key);
295                        if(null != featsmap) {
296                                for (List<FeatureI> feats: featsmap.values()){
297                                        list.addAll(Collections.unmodifiableCollection(feats));
298                                }
299                                return list;
300                        }
301                }
302
303                for (FeatureI f : this) {
304                        if (f.hasAttribute(key)) {
305                                list.add(f);
306                        }
307                }
308                return list;
309        }
310
311        /**
312         * Create a list of all features that include the specified key/value pair in their userMap().
313         *
314         * @param key The key to consider.
315         * @param value The value to consider.
316         * @return A list of features that include the key/value pair.
317         */
318        public FeatureList selectByUserData(String key, Object value) {
319                FeatureList list = new FeatureList();
320                for (FeatureI f : this) {
321                        Object o = f.userData().get(key);
322                        if (o != null && o.equals(value)) {
323                                list.add(f);
324                        }
325                }
326                return list;
327        }
328        /**
329         * Create a list of all features that include the specified key in their userMap().
330         *
331         * @param key The key to consider.
332         * @return A list of features that include the key.
333         */
334        public FeatureList selectByUserData(String key) {
335                FeatureList list = new FeatureList();
336                for (FeatureI f : this) {
337                        if (f.userData().containsKey(key)) {
338                                list.add(f);
339                        }
340                }
341                return list;
342        }
343
344        /**
345         * Create a list of all features that overlap the specified location on the specified
346         * sequence.
347         *
348         * @param seqname The sequence name. Only features with this sequence name will be checked for overlap.
349         * @param location The location to check.
350         * @param useBothStrands If true, locations are mapped to their positive strand image
351         * before being checked for overlap. If false, only features whose locations are
352         * on the same strand as the specified location will be considered for inclusion.
353         * @return The new list of features that overlap the location.
354         */
355        public FeatureList selectOverlapping(String seqname, Location location, boolean useBothStrands)
356                        throws Exception {
357                FeatureList list = new FeatureList();
358
359                for (FeatureI feature : this) {
360                        boolean overlaps = false;
361                        if (feature.seqname().equals(seqname)) {
362                                if (location.isSameStrand(feature.location())) {
363                                        overlaps = feature.location().overlaps(location);
364                                } else if (useBothStrands) {
365                                        overlaps = feature.location().overlaps(location.opposite());
366                                }
367                        }
368                        if (overlaps) {
369                                list.add(feature);
370                        }
371                }
372                return list;
373        }
374
375        /**
376         * Create a list of all features that do not overlap the specified location on the specified sequence.
377         *
378         * @param seqname The sequence name. Only features with this sequence name will be checked for overlap.
379         * @param location The location to check.
380         * @param useBothStrands If true, locations are mapped to their positive strand image
381         * before being checked for overlap. If false, all features whose locations are
382         * on the opposite strand from the specified location will be considered non-overlapping.
383         * @return The new list of features that do not overlap the location.
384         */
385        public FeatureList omitOverlapping(String seqname, Location location, boolean useBothStrands) {
386                FeatureList list = new FeatureList();
387
388                for (FeatureI feature : this) {
389                        boolean overlaps = false;
390                        if (feature.seqname().equals(seqname)) {
391                                if (location.isSameStrand(feature.location())) {
392                                        overlaps = feature.location().overlaps(location);
393                                } else if (useBothStrands) {
394                                        overlaps = feature.location().overlaps(location.opposite());
395                                }
396                        }
397
398                        if (!overlaps) {
399                                list.add(feature);
400                        }
401                }
402
403                return list;
404        }
405
406        /**
407         * Check if any feature in list has the specified attribute key.
408         *
409         * @param key The attribute key to consider.
410         * @return True if at least one feature has the attribute key.
411         */
412        public boolean hasAttribute(String key) {
413                if (featindex.containsKey(key)){
414                        Map<String, List<FeatureI>> mappa = featindex.get(key);
415                        if (mappa!= null && mappa.size()>0)return true;
416                        return false;
417                }
418                for (FeatureI f : this) {
419                        if (f.hasAttribute(key)) {
420                                return true;
421                        }
422                }
423
424                return false;
425        }
426
427        /**
428         * Check if any feature in list has the specified attribute key/value pair.
429         *
430         * @param key The attribute key to consider.
431         * @param value The attribute value to consider.
432         * @return True if at least one feature has the key/value pair.
433         */
434        public boolean hasAttribute(String key, String value) {
435                if (featindex.containsKey(key)){
436                        Map<String, List<FeatureI>> mappa = featindex.get(key);
437                        if (mappa == null) return false;
438                        if (mappa.containsKey(value)) return true;
439                        return false;
440                }
441
442                for (FeatureI f : this) {
443                        if (f.hasAttribute(key, value)) {
444                                return true;
445                        }
446                }
447
448                return false;
449        }
450
451        /**
452         * Return a string representation of all features in this list.
453         *
454         * @return A string.
455         */
456        @Override
457        public String toString() {
458                StringBuilder  s = new StringBuilder("FeatureList: >>\n");
459                for (FeatureI f : this) {
460                        s.append( f.seqname() + ":" + f.toString() + "\n");
461                }
462
463                s.append("\n<<\n");
464                return s.toString();
465        }
466
467        /**
468         * used by sort routine
469         */
470        private class FeatureComparator implements Comparator<FeatureI> {
471
472                @Override
473                public int compare(FeatureI a, FeatureI b) {
474                        if (a.seqname().equals(b.seqname()) && a.location().isSameStrand(b.location())) {
475                                return a.location().start() - b.location().start();             //sort on start
476                        } else {
477                                throw new IndexOutOfBoundsException("Cannot compare/sort features whose locations are on opposite strands or with different seqname().\r\n" + a.toString() + "\r\n" + b.toString() );
478                        }
479                }
480        }
481
482        /**
483         * Create a new list that is ordered by the starting index of the features' locations. All locations
484         * must be on the same strand of the same sequence.
485         *
486         * @return An ordered list.
487         * @throws IndexOutOfBoundsException Cannot compare/sort features whose locations are on opposite strands, or
488         * whose seqnames differ.
489         */
490        public FeatureList sortByStart() {
491                FeatureI array[] = toArray(new FeatureI[1]);
492
493                Arrays.sort(array, new FeatureComparator());
494
495                return new FeatureList(Arrays.asList(array));
496        }
497
498        /**
499         * @deprecated
500         *
501         */
502        // FIXME features may have a null location() !!
503        @Deprecated
504        static public void main(String args[]) {
505        }
506
507
508        /**
509         * Add a list of attributes that will be used as indexes for queries
510         * @param indexes  the List containing the attribute_id
511         */
512        public void addIndexes(List<String> indexes) {
513                for (String index : indexes){
514                        addIndex(index);
515                }
516
517        }
518        /**
519         * Add an attribute that will be used as index for queries
520         * @param index an attribute_id
521         */
522        public void addIndex(String index) {
523                featindex.put(index, null);
524        }
525}