001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.program.gff; 023 024import java.util.ArrayList; 025import java.util.Iterator; 026import java.util.List; 027import java.util.Map; 028 029import org.biojava.bio.Annotation; 030import org.biojava.bio.BioException; 031import org.biojava.bio.SmallAnnotation; 032import org.biojava.bio.seq.Feature; 033import org.biojava.bio.seq.FramedFeature; 034import org.biojava.bio.seq.Sequence; 035import org.biojava.bio.seq.SequenceAnnotator; 036import org.biojava.bio.seq.StrandedFeature; 037import org.biojava.bio.symbol.RangeLocation; 038import org.biojava.utils.ChangeVetoException; 039 040/** 041 * A set of entries and comments as a representation of a GFF file. 042 * <p> 043 * This is an intermediate storage solution for GFF stuff. It lets you 044 * collect together an arbitrary set of GFF records and comments, and then 045 * do something with them later. 046 * 047 * @author Matthew Pocock 048 * @author Keith James (docs) 049 * @author Len Trigg 050 */ 051public class GFFEntrySet { 052 public static final String PROPERTY_GFF_SCORE = "org.biojava.bio.program.gff.gff_feature_score"; 053 054 /** 055 * All of the lines - comments & records 056 */ 057 private List lines; 058 059 /** 060 * Make an empty <span class="type">GFFEntrySet</span>. 061 */ 062 public GFFEntrySet() { 063 lines = new ArrayList(); 064 } 065 066 /** 067 * Loop over all lines in the set. 068 * <p> 069 * The <span class="type">Iterator</span> 070 * will return <span class="type">String</span> and <span class="type"> 071 * GFFRecord</span> objects in the order that they were added to this set. 072 * It is your responsibility to check the type of 073 * <span class="method">hasNext()</span> before casting it. 074 */ 075 public Iterator lineIterator() { 076 return lines.iterator(); 077 } 078 079 /** 080 * Add a comment to the end of this set. 081 * <p> 082 * This should be the text of the comment, without the leading 083 * '<code>#</code>'. 084 * 085 * @param comment a <span class="type">String</span> giving the comment 086 */ 087 public void add(String comment) { 088 lines.add(comment); 089 } 090 091 /** 092 * Add a <span class="type">GFFRecord</span> to the end of this set. 093 * 094 * @param record a <span class="type">GFFRecord</span> to append 095 */ 096 public void add(GFFRecord record) { 097 lines.add(record); 098 } 099 100 /** 101 * Return how many lines are in this set. 102 * 103 * @return the size 104 */ 105 public int size() { 106 return lines.size(); 107 } 108 109 /** 110 * Get an annotator that can add GFF features to a 111 * <span class="type">Sequence</span> using the features in this 112 * <span class="type">GFFEntrySet</span>. The SequenceAnnotator 113 * returned by this method currently adds new features to an 114 * existing sequence (assuming it implements MutableFeatureHolder). 115 * 116 * <p> 117 * Sequences are only annotated if their getName() method returns 118 * a name equal to the sequence name field of one or more records 119 * in this GFFEntrySet. 120 * </p> 121 * 122 * @return an <span class="type">SequenceAnnotator</span> that adds GFF features 123 */ 124 public SequenceAnnotator getAnnotator() { 125 return getAnnotator(true); 126 } 127 128 /** 129 * Get an annotator that can add GFF features to a 130 * <span class="type">Sequence</span> using the features in this 131 * <span class="type">GFFEntrySet</span>. The SequenceAnnotator 132 * returned by this method currently adds new features to an 133 * existing sequence (assuming it implements MutableFeatureHolder). 134 * 135 * <p> 136 * If checkSeqName is set to true, 137 * Sequences are only annotated if their getName() method returns 138 * a name equal to the sequence name field of one or more records 139 * in this GFFEntrySet. If checkSeqName is false, then all features are 140 * added to the sequence regardless of name. 141 * </p> 142 * 143 * @param checkSeqName boolean to indicate if only records with names 144 * matching the sequences name should be added 145 * @return an <span class="type">SequenceAnnotator</span> that adds GFF featur 146 es 147 */ 148 public SequenceAnnotator getAnnotator(final boolean checkSeqName) { 149 return new SequenceAnnotator() { 150 public Sequence annotate(Sequence seq) throws BioException, ChangeVetoException { 151 Feature.Template plain = new Feature.Template(); 152 StrandedFeature.Template stranded = new StrandedFeature.Template(); 153 FramedFeature.Template framed = new FramedFeature.Template(); 154 plain.annotation = Annotation.EMPTY_ANNOTATION; 155 stranded.annotation = Annotation.EMPTY_ANNOTATION; 156 framed.annotation = Annotation.EMPTY_ANNOTATION; 157 for (Iterator i = lineIterator(); i.hasNext();) { 158 Object o = i.next(); 159 if (o instanceof GFFRecord) { 160 GFFRecord rec = (GFFRecord) o; 161 if (!checkSeqName || rec.getSeqName().equals(seq.getName())) { 162 Feature.Template thisTemplate; 163 164 if (rec.getStrand() == StrandedFeature.UNKNOWN) { 165 plain.location = new RangeLocation(rec.getStart(), rec.getEnd()); 166 plain.type = rec.getFeature(); 167 plain.source = rec.getSource(); 168 thisTemplate = plain; 169 } else if (rec.getFrame() == GFFTools.NO_FRAME) { 170 stranded.location = new RangeLocation(rec.getStart(), rec.getEnd()); 171 stranded.type = rec.getFeature(); 172 stranded.source = rec.getSource(); 173 stranded.strand = rec.getStrand(); 174 thisTemplate = stranded; 175 } else { 176 framed.location = new RangeLocation(rec.getStart(), rec.getEnd()); 177 framed.type = rec.getFeature(); 178 framed.source = rec.getSource(); 179 framed.strand = rec.getStrand(); 180 switch (rec.getFrame()) { 181 case 0: 182 framed.readingFrame = FramedFeature.FRAME_0; 183 break; 184 case 1: 185 framed.readingFrame = FramedFeature.FRAME_1; 186 break; 187 case 2: 188 framed.readingFrame = FramedFeature.FRAME_2; 189 break; 190 } 191 thisTemplate = framed; 192 } 193 194 thisTemplate.annotation = new SmallAnnotation(); 195 if (rec.getScore() != GFFTools.NO_SCORE) { 196 thisTemplate.annotation.setProperty(PROPERTY_GFF_SCORE, new Double(rec.getScore())); 197 } 198 Map attributes = rec.getGroupAttributes(); 199 Iterator it = attributes.keySet().iterator(); 200 while (it.hasNext()) { 201 String key = (String) it.next(); 202 thisTemplate.annotation.setProperty(key, attributes.get(key)); 203 } 204 seq.createFeature(thisTemplate); 205 } 206 } 207 } 208 return seq; 209 } 210 }; 211 } 212 213 /** 214 * Filter this entry set into another set. 215 * 216 * @param filter the <span class="type">GFFRecordFilter</span> to filter with 217 * @return a new <span class="type">GFFEntrySet</span> containing only the 218 * items filtered in by the filter 219 */ 220 public GFFEntrySet filter(GFFRecordFilter filter) { 221 GFFEntrySet accepted = new GFFEntrySet(); 222 for (Iterator i = lineIterator(); i.hasNext();) { 223 Object o = i.next(); 224 if (o instanceof GFFRecord) { 225 GFFRecord record = (GFFRecord) o; 226 if (filter.accept(record)) { 227 accepted.add(record); 228 } 229 } 230 } 231 232 return accepted; 233 } 234 235 /** 236 * Get the <span class="type">GFFDocumentHandler</span> for adding to this 237 * set. 238 * 239 * @return a <span class="type">GFFDocumentHandler</span> that adds everything 240 * that it recieves to this set 241 */ 242 public GFFDocumentHandler getAddHandler() { 243 return new EntrySetBuilder(); 244 } 245 246 /** 247 * Write all records in this set out to a handler. 248 * 249 * @param handler the GFFDocumentHandler to inform of the records 250 */ 251 public void streamRecords(GFFDocumentHandler handler) { 252 handler.startDocument(""); 253 254 for (Iterator i = lineIterator(); i.hasNext();) { 255 Object line = i.next(); 256 if (line instanceof String) { 257 handler.commentLine((String) line); 258 } else { 259 handler.recordLine((GFFRecord) line); 260 } 261 } 262 263 handler.endDocument(); 264 } 265 266 /** 267 * The type of object returned by <span class="method">getAddHandler</span>. 268 * 269 * @author Matthew Pocock 270 * @author Thomas Down 271 */ 272 private class EntrySetBuilder implements GFFDocumentHandler { 273 public void startDocument(String locator) { 274 } 275 276 public void endDocument() { 277 } 278 279 public void commentLine(String comment) { 280 lines.add(comment); 281 } 282 283 public void recordLine(GFFRecord record) { 284 lines.add(record); 285 } 286 } 287}