001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.gff;
023
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Map;
028
029import org.biojava.bio.Annotation;
030import org.biojava.bio.BioException;
031import org.biojava.bio.SmallAnnotation;
032import org.biojava.bio.seq.Feature;
033import org.biojava.bio.seq.FramedFeature;
034import org.biojava.bio.seq.Sequence;
035import org.biojava.bio.seq.SequenceAnnotator;
036import org.biojava.bio.seq.StrandedFeature;
037import org.biojava.bio.symbol.RangeLocation;
038import org.biojava.utils.ChangeVetoException;
039
040/**
041 * A set of entries and comments as a representation of a GFF file.
042 * <p>
043 * This is an intermediate storage solution for GFF stuff. It lets you
044 * collect together an arbitrary set of GFF records and comments, and then
045 * do something with them later.
046 *
047 * @author Matthew Pocock
048 * @author Keith James (docs)
049 * @author Len Trigg
050 */
051public class GFFEntrySet {
052  public static final String PROPERTY_GFF_SCORE = "org.biojava.bio.program.gff.gff_feature_score";
053
054  /**
055   * All of the lines - comments & records
056   */
057  private List lines;
058
059  /**
060   * Make an empty <span class="type">GFFEntrySet</span>.
061   */
062  public GFFEntrySet() {
063    lines = new ArrayList();
064  }
065
066  /**
067   * Loop over all lines in the set.
068   * <p>
069   * The <span class="type">Iterator</span>
070   * will return <span class="type">String</span> and <span class="type">
071   * GFFRecord</span> objects in the order that they were added to this set.
072   * It is your responsibility to check the type of
073   * <span class="method">hasNext()</span> before casting it.
074   */
075  public Iterator lineIterator() {
076    return lines.iterator();
077  }
078
079  /**
080   * Add a comment to the end of this set.
081   * <p>
082   * This should be the text of the comment, without the leading
083   * '<code>#</code>'.
084   *
085   * @param comment a <span class="type">String</span> giving the comment
086   */
087  public void add(String comment) {
088    lines.add(comment);
089  }
090
091  /**
092   * Add a <span class="type">GFFRecord</span> to the end of this set.
093   *
094   * @param record a <span class="type">GFFRecord</span> to append
095   */
096  public void add(GFFRecord record) {
097    lines.add(record);
098  }
099
100  /**
101   * Return how many lines are in this set.
102   *
103   * @return the size
104   */
105  public int size() {
106    return lines.size();
107  }
108
109  /**
110   * Get an annotator that can add GFF features to a
111   * <span class="type">Sequence</span> using the features in this
112   * <span class="type">GFFEntrySet</span>.  The SequenceAnnotator
113   * returned by this method currently adds new features to an
114   * existing sequence (assuming it implements MutableFeatureHolder).
115   *
116   * <p>
117   * Sequences are only annotated if their getName() method returns
118   * a name equal to the sequence name field of one or more records
119   * in this GFFEntrySet.
120   * </p>
121   *
122   * @return an <span class="type">SequenceAnnotator</span> that adds GFF features
123   */
124  public SequenceAnnotator getAnnotator() {
125    return getAnnotator(true);
126  }
127
128  /**
129   * Get an annotator that can add GFF features to a
130   * <span class="type">Sequence</span> using the features in this
131   * <span class="type">GFFEntrySet</span>.  The SequenceAnnotator
132   * returned by this method currently adds new features to an
133   * existing sequence (assuming it implements MutableFeatureHolder).
134   *
135   * <p>
136   * If checkSeqName is set to true,
137   * Sequences are only annotated if their getName() method returns
138   * a name equal to the sequence name field of one or more records
139   * in this GFFEntrySet. If checkSeqName is false, then all features are
140   * added to the sequence regardless of name.
141   * </p>
142   *
143   * @param checkSeqName  boolean to indicate if only records with names
144   *        matching the sequences name should be added
145   * @return an <span class="type">SequenceAnnotator</span> that adds GFF featur
146   es
147   */
148  public SequenceAnnotator getAnnotator(final boolean checkSeqName) {
149    return new SequenceAnnotator() {
150      public Sequence annotate(Sequence seq) throws BioException, ChangeVetoException {
151        Feature.Template plain = new Feature.Template();
152        StrandedFeature.Template stranded = new StrandedFeature.Template();
153        FramedFeature.Template framed = new FramedFeature.Template();
154        plain.annotation = Annotation.EMPTY_ANNOTATION;
155        stranded.annotation = Annotation.EMPTY_ANNOTATION;
156        framed.annotation = Annotation.EMPTY_ANNOTATION;
157        for (Iterator i = lineIterator(); i.hasNext();) {
158          Object o = i.next();
159          if (o instanceof GFFRecord) {
160            GFFRecord rec = (GFFRecord) o;
161            if (!checkSeqName || rec.getSeqName().equals(seq.getName())) {
162              Feature.Template thisTemplate;
163
164              if (rec.getStrand() == StrandedFeature.UNKNOWN) {
165                plain.location = new RangeLocation(rec.getStart(), rec.getEnd());
166                plain.type = rec.getFeature();
167                plain.source = rec.getSource();
168                thisTemplate = plain;
169              } else if (rec.getFrame() == GFFTools.NO_FRAME) {
170                stranded.location = new RangeLocation(rec.getStart(), rec.getEnd());
171                stranded.type = rec.getFeature();
172                stranded.source = rec.getSource();
173                stranded.strand = rec.getStrand();
174                thisTemplate = stranded;
175              } else {
176                framed.location = new RangeLocation(rec.getStart(), rec.getEnd());
177                framed.type = rec.getFeature();
178                framed.source = rec.getSource();
179                framed.strand = rec.getStrand();
180                switch (rec.getFrame()) {
181                  case 0:
182                    framed.readingFrame = FramedFeature.FRAME_0;
183                    break;
184                  case 1:
185                    framed.readingFrame = FramedFeature.FRAME_1;
186                    break;
187                  case 2:
188                    framed.readingFrame = FramedFeature.FRAME_2;
189                    break;
190                }
191                thisTemplate = framed;
192              }
193
194              thisTemplate.annotation = new SmallAnnotation();
195              if (rec.getScore() != GFFTools.NO_SCORE) {
196                thisTemplate.annotation.setProperty(PROPERTY_GFF_SCORE, new Double(rec.getScore()));
197              }
198              Map attributes = rec.getGroupAttributes();
199              Iterator it = attributes.keySet().iterator();
200              while (it.hasNext()) {
201                String key = (String) it.next();
202                thisTemplate.annotation.setProperty(key, attributes.get(key));
203              }
204              seq.createFeature(thisTemplate);
205            }
206          }
207        }
208        return seq;
209      }
210    };
211  }
212
213  /**
214   * Filter this entry set into another set.
215   *
216   * @param filter the <span class="type">GFFRecordFilter</span> to filter with
217   * @return  a new <span class="type">GFFEntrySet</span> containing only the
218   *          items filtered in by the filter
219   */
220  public GFFEntrySet filter(GFFRecordFilter filter) {
221    GFFEntrySet accepted = new GFFEntrySet();
222    for (Iterator i = lineIterator(); i.hasNext();) {
223      Object o = i.next();
224      if (o instanceof GFFRecord) {
225        GFFRecord record = (GFFRecord) o;
226        if (filter.accept(record)) {
227          accepted.add(record);
228        }
229      }
230    }
231
232    return accepted;
233  }
234
235  /**
236   * Get the <span class="type">GFFDocumentHandler</span> for adding to this
237   * set.
238   *
239   * @return  a <span class="type">GFFDocumentHandler</span> that adds everything
240   *          that it recieves to this set
241   */
242  public GFFDocumentHandler getAddHandler() {
243    return new EntrySetBuilder();
244  }
245
246  /**
247   * Write all records in this set out to a handler.
248   *
249   * @param handler  the GFFDocumentHandler to inform of the records
250   */
251  public void streamRecords(GFFDocumentHandler handler) {
252    handler.startDocument("");
253
254    for (Iterator i = lineIterator(); i.hasNext();) {
255      Object line = i.next();
256      if (line instanceof String) {
257        handler.commentLine((String) line);
258      } else {
259        handler.recordLine((GFFRecord) line);
260      }
261    }
262
263    handler.endDocument();
264  }
265
266  /**
267   * The type of object returned by <span class="method">getAddHandler</span>.
268   *
269   * @author Matthew Pocock
270   * @author Thomas Down
271   */
272  private class EntrySetBuilder implements GFFDocumentHandler {
273    public void startDocument(String locator) {
274    }
275
276    public void endDocument() {
277    }
278
279    public void commentLine(String comment) {
280      lines.add(comment);
281    }
282
283    public void recordLine(GFFRecord record) {
284      lines.add(record);
285    }
286  }
287}