001/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ 002/* 003 * BioJava development code 004 * 005 * This code may be freely distributed and modified under the 006 * terms of the GNU Lesser General Public Licence. This should 007 * be distributed with the code. If you do not have a copy, 008 * see: 009 * 010 * http://www.gnu.org/copyleft/lesser.html 011 * 012 * Copyright for this code is held jointly by the individual 013 * authors. These should be listed in @author doc comments. 014 * 015 * For more information on the BioJava project and its aims, 016 * or to join the biojava-l mailing list, visit the home page 017 * at: 018 * 019 * http://www.biojava.org/ 020 * 021 */ 022 023package org.biojava.bio.program.gff; 024 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031 032import org.biojava.bio.BioException; 033import org.biojava.bio.seq.Feature; 034import org.biojava.bio.seq.FeatureFilter; 035import org.biojava.bio.seq.Sequence; 036import org.biojava.bio.seq.StrandedFeature; 037import org.biojava.bio.seq.db.SequenceDB; 038import org.biojava.bio.symbol.Location; 039 040/** 041 * Turns a sequence database into a GFF event stream. 042 * 043 * @author Matthew Pocock 044 * @author Thomas Down 045 * @author Len Trigg 046 */ 047public class SequencesAsGFF { 048 /** 049 * The <span class="type">FeatureFilter</span> for selecting features to 050 * report as <span class="type">GFFRecord</span>s. 051 */ 052 private FeatureFilter filter = FeatureFilter.all; 053 054 /** 055 * Whether or not to recurse through the features during searching. 056 */ 057 private boolean recurse = false; 058 059 /** 060 * Whether or not non-contiguous features should be broken into blocks 061 * 062 * @since 1.4 063 */ 064 065 private boolean shatter = false; 066 067 private boolean generateSequenceHeader = true; 068 069 /** 070 * Specify whether features with non-contiguous locations should be broken 071 * up such that a GFF feature line is emitted for each contiguous block. 072 * 073 * @param b 074 * @since 1.4 075 */ 076 077 public void setShatter(boolean b) { 078 this.shatter = b; 079 } 080 081 /** 082 * Determine if features with non-contiguous locations will be broken into 083 * multiple GFF records. 084 * 085 * @since 1.4 086 */ 087 088 public boolean getShatter() { 089 return shatter; 090 } 091 092 /** 093 * Specify whether a per-sequence header line, giving the length of the 094 * sequence, should be generated. 095 * 096 * @since 1.4 097 */ 098 099 public void setGenerateSequenceHeader(boolean b) { 100 this.generateSequenceHeader = b; 101 } 102 103 /** 104 * Discover if per-sequence header lines will be generated. 105 * 106 * @since 1.4 107 */ 108 109 public boolean getGenerateSequenceHeader() { 110 return generateSequenceHeader; 111 } 112 113 /** 114 * Return the current <span class="type">FeatureFilter</span>. 115 * <p> 116 * This is the object that will accept or reject individual features. 117 * 118 * @return the current <span class="type">FeatureFilter</span> 119 */ 120 public FeatureFilter getFeatureFilter() { 121 return filter; 122 } 123 124 /** 125 * Replace the current <span class="type">FeatureFilter</span> with 126 * <span class="arg">filter</span>. 127 * 128 * @param filter the new <span class="type">FeatureFilter</span> 129 */ 130 public void setFeatureFilter(FeatureFilter filter) { 131 this.filter = filter; 132 } 133 134 /** 135 * Return whether features will be filtered recursively or not. 136 * 137 * @return whether or not to recurse 138 */ 139 public boolean getRecurse() { 140 return recurse; 141 } 142 143 /** 144 * Set whether features will be filtered recursively to 145 * <span class="arg">recurse</span>. 146 * 147 * @param recurse <span class="kw">true</span> if you want to recurse, 148 * <span class="kw">false</span> otherwise 149 */ 150 public void setRecurse(boolean recurse) { 151 this.recurse = recurse; 152 } 153 154 /** 155 * Emit any per-sequence header information. 156 * The default implementation emits sequence-region comment lines. 157 * 158 * @since 1.4 159 */ 160 161 protected void doPreProcessSequence( 162 Sequence seq, 163 GFFDocumentHandler handler, 164 String id 165 ) 166 throws BioException 167 { 168 if (generateSequenceHeader) { 169 handler.commentLine("#sequence-region " + id + " 1 " + seq.length()); 170 } 171 } 172 173 /** 174 * Internal method to process an individual <span class="type">Sequence</span>. 175 * 176 * @param seq the <span class="type">Sequence</span> to GFFify 177 * @param handler the <span class="type">GFFDocumentHandler</span> that will 178 * receive the GFF for all suitable features within 179 * <span class="arg">seq</span> 180 * @param id the value of the <span class="method">seqName</span> field in any 181 * <span class="type">GFFRecord</span>s produced 182 */ 183 protected void doProcessSequence(Sequence seq, 184 GFFDocumentHandler handler, 185 String id) 186 throws BioException 187 { 188 Iterator fi = seq.filter(getFeatureFilter(), getRecurse()).features(); 189 190 while (fi.hasNext()) { 191 doProcessFeature((Feature) fi.next(), handler, id); 192 } 193 } 194 195 196 /** 197 * Internal method to process an individual <span class="type">Feature</span>. 198 * 199 * @param feature the <span class="type">Feature</span> to GFFify 200 * @param handler the <span class="type">GFFDocumentHandler</span> that will 201 * receive the GFF for this feature 202 * @param id the value of the <span class="method">seqName</span> field in any 203 * <span class="type">GFFRecord</span>s produced 204 */ 205 protected void doProcessFeature(Feature feature, 206 GFFDocumentHandler handler, 207 String id) 208 throws BioException 209 { 210 SimpleGFFRecord record = createGFFRecord(feature, id); 211 if (shatter && !feature.getLocation().isContiguous()) { 212 for (Iterator si = feature.getLocation().blockIterator(); si.hasNext(); ) { 213 Location shatterBloc = (Location) si.next(); 214 record.setStart(shatterBloc.getMin()); 215 record.setEnd(shatterBloc.getMax()); 216 handler.recordLine(record); 217 } 218 } else { 219 handler.recordLine(record); 220 } 221 } 222 223 224 /** 225 * Internal method to create a <span class="type">GFFRecord</span> 226 * from an individual <span class="type">Feature</span>. 227 * 228 * @param feature the <span class="type">Feature</span> to GFFify 229 * @param id the value of the <span class="method">seqName</span> field in any 230 * <span class="type">GFFRecord</span>s produced 231 */ 232 protected SimpleGFFRecord createGFFRecord(Feature feature, 233 String id) 234 throws BioException { 235 236 SimpleGFFRecord record = new SimpleGFFRecord(); 237 record.setSeqName(id); 238 record.setSource(feature.getSource()); 239 record.setFeature(feature.getType()); 240 Location loc = feature.getLocation(); 241 record.setStart(loc.getMin()); 242 record.setEnd(loc.getMax()); 243 record.setScore(GFFTools.NO_SCORE); 244 record.setStrand(StrandedFeature.UNKNOWN); 245 if (feature instanceof StrandedFeature) { 246 StrandedFeature sf = (StrandedFeature) feature; 247 if (sf.getStrand() == StrandedFeature.POSITIVE) { 248 record.setStrand(StrandedFeature.POSITIVE); 249 } else if (sf.getStrand() == StrandedFeature.NEGATIVE) { 250 record.setStrand(StrandedFeature.NEGATIVE); 251 } 252 } 253 record.setFrame(GFFTools.NO_FRAME); 254 Map fMap = feature.getAnnotation().asMap(); 255 Map fMap2 = new HashMap(); 256 for (Iterator ki = fMap.keySet().iterator(); ki.hasNext(); ) { 257 Object key = ki.next(); 258 Object value = fMap.get(key); 259 String keyS = key.toString(); 260 List valueList; 261 if (value instanceof Collection) { 262 valueList = new ArrayList((Collection) value); 263 } else { 264 //valueList = Collections.singletonList(value); 1.3? 265 valueList = new ArrayList(); 266 valueList.add(value); 267 } 268 for (int i = 0; i < valueList.size(); i++) { 269 Object o = valueList.get(i); 270 valueList.set(i, o.toString()); 271 } 272 fMap2.put(keyS, valueList); 273 } 274 record.setGroupAttributes(fMap2); 275 record.setComment(null); 276 277 return record; 278 } 279 280 281 /** 282 * Process an individual <span class="type">Sequence</span>, informing 283 * <span class="arg">handler</span> of any suitable features. 284 * 285 * @param seq the <span class="type">Sequence</span> to GFFify 286 * @param handler the <span class="type">GFFDocumentHandler</span> that will 287 * receive the GFF for all suitable features within 288 * <span class="arg">seq</span> 289 */ 290 public void processSequence(Sequence seq, GFFDocumentHandler handler) 291 throws BioException { 292 handler.startDocument(seq.getURN()); 293 doPreProcessSequence(seq, handler, seq.getName()); 294 doProcessSequence(seq, handler, seq.getName()); 295 handler.endDocument(); 296 } 297 298 /** 299 * Process all <span class="type">Sequence</span>s within a 300 * <span class="type">SequenceDB</span>, informing 301 * <span class="arg">handler</span> of any suitable features. 302 * 303 * @param seqDB the <span class="type">SequenceDB</span> to GFFify 304 * @param handler the <span class="type">GFFDocumentHandler</span> that will 305 * receive the GFF for all suitable features within 306 * <span class="arg">seqDB</span> 307 */ 308 public void processDB(SequenceDB seqDB, GFFDocumentHandler handler) 309 throws BioException { 310 handler.startDocument("unknown:SequenceDB:" + seqDB.getName()); 311 for(Iterator i = seqDB.ids().iterator(); i.hasNext(); ) { 312 String id = (String) i.next(); 313 Sequence seq = seqDB.getSequence(id); 314 doPreProcessSequence(seq, handler, id); 315 } 316 for(Iterator i = seqDB.ids().iterator(); i.hasNext(); ) { 317 String id = (String) i.next(); 318 Sequence seq = seqDB.getSequence(id); 319 doProcessSequence(seq, handler, id); 320 } 321 handler.endDocument(); 322 } 323}