001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.parsers.gff;
022
023import java.util.HashMap;
024
025
026/**
027 * A Feature corresponds to a single row in a GFF file.
028 *
029 * @author Hanno Hinsch
030 */
031public class Feature implements FeatureI {
032
033        private Location mLocation;
034        private String mSeqname;
035        private String mSource;
036        private String mType;
037        private double mScore;                  //or . if none
038        private int mFrame;                             //0,1,2
039        private String mAttributes;                     //any trailing stuff
040        private HashMap<String, String> mUserMap;
041
042        /**
043         * Get the sequence name. (GFF field 1). Note that feature objects have
044         * no link or reference to the actual sequence object to which
045         * they refer; they are completely uncoupled.
046         *
047         * @return Sequence name.
048         */
049        @Override
050        public String seqname() {
051                return mSeqname;
052        }
053
054        ;
055
056        /**
057         * Get source (aka method). (GFF field 2). This is often the name of
058         * the program or procedure that created the features.
059         *
060         * @return Source field.
061         */
062        public String source() {
063                return mSource;
064        }
065
066        ;
067
068        /**
069         * Get feature type, such as "exon" or "CDS". (GFF field 3).
070         *
071         * @return Feature type.
072         */
073        @Override
074        public String type() {
075                return mType;
076        }
077
078        ;
079
080        /**
081         * Get location of feature. Note that feature objects have
082         * no link or reference to the actual sequence object to which
083         * they refer; they are completely uncoupled.
084         *
085         * @return Location of feature.
086         */
087        @Override
088        public Location location() {
089                return mLocation;
090        }
091
092        /**
093         * Get score. (GFF field 7). The meaning of the score varies from file to file.
094         *
095         * @return Score value.
096         */
097        public double score() {
098                return mScore;
099        }
100
101        ;
102
103        /**
104         * Get frame (aka phase). (GFF field 8). Specifies the offset of the
105         * first nucleotide of the first in-frame codon, assuming this feature
106         * is a dna/rna sequence that codes
107         * for a protein. If you
108         * intend to use this field, you probably want to look it up on the web first.
109         *
110         * @return The frame (0, 1, 2).
111         */
112        public int frame() {
113                return mFrame;
114        }
115
116        ;
117
118        /**
119         * Get the string of key/value attributes. (GFF field 9). The format and
120         * meaning of this field varies from flavor to flavor of GFF/GTF. This method
121         * simply returns the whole string. Other methods in this class make assumptions
122         * about its format and provide additional utility.
123         *
124         * @return The attribute string.
125         */
126        public String attributes() {
127                return mAttributes;
128        }
129
130        ;
131
132        @SuppressWarnings("unused")
133        private Feature() {
134        }
135
136        ;        //unavailable
137
138        /**
139         * Make a copy of the specified feature. The mappings in the userMap() HashMap
140         * are copied, so each feature has independent user data. Note, however, that the
141         * actual objects in the HashMap are shared (not copied), so a change to such an object may
142         * affect multiple features.
143         *
144         * @param feature Feature to clone.
145         */
146        public Feature(Feature feature) {
147
148                mSeqname = feature.mSeqname;
149                mSource = feature.mSource;
150                mType = feature.mType;
151                mLocation = feature.mLocation;
152                mScore = feature.mScore;
153                mFrame = feature.mFrame;
154                mAttributes = feature.mAttributes;
155                initAttributeHashMap();
156                mUserMap = new HashMap<String, String>(feature.mUserMap);
157        }
158
159        /**
160         * Construct a new Feature from raw data (usually a GFF row).
161         *
162         * @param seqname The sequence name field (field 1).
163         * @param source The source or method field (field 2).
164         * @param type The type of feature field (field 3).
165         * @param location The location of the feature. (calculated from GFF start, end and strand fields).
166         * @param score The score field (field 7).
167         * @param frame The frame or phase field (field 8).
168         * @param attributes A string of key/value pairs separated by semicolons (field 9).
169         */
170        public Feature(String seqname, String source, String type, Location location, Double score, int frame, String attributes) {
171
172                mSeqname = seqname;
173                mSource = source;
174                mType = type;
175                mLocation = location;
176                mScore = score;
177                mFrame = frame;
178                mAttributes = attributes;
179                initAttributeHashMap();
180                mUserMap = new HashMap<String, String>();
181
182        }
183
184        /**
185         * Get HashMap of user data. Each Feature object has a Java HashMap object
186         * which can be used to annotate the Feature. JavaGene does not use or interpret
187         * the keys or values. The values can be any subtype of the Java Object class.
188         *<br><br>
189         * If a Feature is constructed from data fields, the initial HashMap has no mappings (is empty).
190         * If a Feature is constructed from another Feature, a copy of the mappings is made.
191         * Note that the Objects in the copied mapping are shared, even though the mapping itself
192         * is copied (not shared). Thus removing or adding a mapping to one Feature will not affect the
193         * other, but changing an Object which is part of an established mapping may affect both Features.
194         *
195         * @return The user HashMap.
196         */
197        @Override
198        public HashMap<String, String> userData() {
199                return mUserMap;
200        }
201
202         HashMap<String,String> attributeHashMap = new HashMap<String,String>();
203
204        private void initAttributeHashMap(){
205           String[] values = mAttributes.split(";");
206           for(String attribute : values){
207                   attribute = attribute.trim();
208                   int equalindex = attribute.indexOf("=");
209                   String splitData = "=";
210                   if(equalindex == -1) //gtf uses space and gff3 uses =
211                           splitData = " ";
212                   String[] data = attribute.split(splitData);
213                   String value = "";
214                   if(data.length >= 2 && data[1].indexOf('"') != -1){ // an attibute field could be empty
215                           value = data[1].replaceAll('"' + "","").trim();
216                   }else if(data.length >= 2){
217                           value = data[1].trim();
218                   }
219                   attributeHashMap.put(data[0].trim(), value);
220           }
221        }
222
223        /**
224         * Get value of specified attribute key. Returns null if the attribute key has no value (does not exist).
225         * Keys are case-sensitive. Assumes attributes are correctly formatted in GFF style.
226         * Known bug: a semicolon within a quoted value will cause parse failure.
227         *
228         * @param key The key.
229         * @return The corresponding value. Null if the key has no value defined.
230         */
231        @Override
232        public String getAttribute(String key) {
233
234                return attributeHashMap.get(key);
235        }
236
237        public String getAttributeOld(String key) {
238                int start = 0;
239
240                int end = mAttributes.indexOf(';');
241                while (0 < end) {
242                        //find the first word (up to space) in chunk,
243                        // see if it is this key
244                        int i = mAttributes.indexOf(' ', start);
245                        if (0 < i && i < end) {
246                                if (mAttributes.substring(start, i).equals(key)) {
247                                        //remove quotes, if needed
248                                        if (mAttributes.charAt(i + 1) == '\"' && mAttributes.charAt(end - 1) == '\"') {
249                                                return mAttributes.substring(i + 2, end - 1);//return attribute
250                                        } else {
251                                                return mAttributes.substring(i + 1, end);       //return attribute
252                                        }
253                                }
254                        }
255                        start = end + 2;        //skip required semicolon and single space
256                        end = mAttributes.indexOf(';', start);
257                }
258
259                return null;
260        }
261
262        @Override
263        public boolean hasAttribute(String key) {
264                return attributeHashMap.containsKey(key);
265        }
266
267        @Override
268        public boolean hasAttribute(String key, String value) {
269                String data = getAttribute(key);
270                if(data == null)
271                        return false;
272                if(data.equals(value))
273                        return true;
274                else
275                        return false;
276        }
277
278        /**
279         * Get the first item (everything before first semicolon, if it has one)
280         * in the attribute field, which is assumed to
281         * be a group identifer. This is appropriate for GFF1 files and variants. It is not
282         * appropriate for GTF and GFF2 files, although they may use a named attribute key,
283         * such as "gene_id" or "transcript_id", for grouping.
284         *
285         * @return The group id. Everything before the first semicolon in the attributes string (minus trailing whitespace).
286         */
287        @Override
288        public String group() {
289                int i = mAttributes.indexOf(';');
290                return (i < 0) ? mAttributes.trim() : mAttributes.substring(0, i).trim();
291        }
292
293        /**
294         *
295         */
296        @Override
297        public String toString() {
298                String s = mSeqname + '\t';
299                s += mSource + '\t';
300                s += mType + '\t';
301                s += mLocation.start() + "\t";
302                s += mLocation.end() + "\t";
303                s += Double.toString(mScore) + "\t";
304
305                if (mFrame == -1) {
306                        s += ".\t";
307                } else {
308                        s += mFrame + "\t";
309                }
310
311                s += mAttributes;
312
313                return s;
314        }
315
316        /**
317         * @deprecated
318         */
319        @Deprecated
320        public static void main(String args[])
321                        throws Exception {
322                //Feature f= new Feature();
323                //intentionally perverse
324                //f.group= "gene_id transcript; transcript \"gene_id fantom2\"; ";
325                //      f.addAttribute( "author", "julian" );
326                //      f.addAttribute( "curator", "nick" );
327                //      f.addAttribute( "author", "hanno" );
328                //Log.log( f.group );
329                //f.addAttribute( "perverse", "foo;goo" );
330                //assert f.getAttribute( "perverse").equals( "foo;goo" );
331                //      assert f.getAttribute( "gene_id" ).equals( "transcript" );
332                //      assert f.getAttribute( "author" ).equals( "julian hanno" );
333                //      assert f.getAttribute( "curator" ).equals( "nick" );
334                //      assert f.getAttribute( "transcript").equals( "gene_id fantom2" );
335                //Log.log( "passed test." );
336        }
337
338        @Override
339        public HashMap<String, String> getAttributes() {
340
341                return attributeHashMap;
342        }
343}