001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.distributed; 023 024import java.util.Collections; 025import java.util.HashMap; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.Map; 029import java.util.Set; 030 031import org.biojava.bio.Annotation; 032import org.biojava.bio.BioError; 033import org.biojava.bio.BioException; 034import org.biojava.bio.program.gff.GFFEntrySet; 035import org.biojava.bio.program.gff.GFFRecord; 036import org.biojava.bio.seq.DNATools; 037import org.biojava.bio.seq.FeatureFilter; 038import org.biojava.bio.seq.FeatureHolder; 039import org.biojava.bio.seq.MergeFeatureHolder; 040import org.biojava.bio.seq.Sequence; 041import org.biojava.bio.seq.impl.SimpleSequence; 042import org.biojava.bio.symbol.DummySymbolList; 043import org.biojava.bio.symbol.SymbolList; 044import org.biojava.utils.ChangeVetoException; 045 046/** 047 * Use a GFFEntrySet as a DataSource for adding annotation to sequences. 048 * 049 * Instantiate this and add it to an instance of DistributeSequenceDB. All 050 * of the GFF features that have sequence fields matching sequence IDs in the 051 * db will be merged in. 052 * 053 * @author Thomas Down 054 * @author Matthew Pocock 055 056 */ 057public class GFFDataSource implements DistDataSource { 058 private GFFEntrySet gffe; 059 private Set ids; 060 private Map id2seq; 061 private MergeFeatureHolder delegateFH; 062 063 public GFFDataSource(GFFEntrySet gffe) { 064 this.gffe = gffe; 065 this.id2seq = new HashMap(); 066 delegateFH = new MergeFeatureHolder(); 067 } 068 069 public boolean hasSequence(String id) throws BioException { 070 return false; 071 } 072 073 public boolean hasFeatures(String id) throws BioException { 074 return ids(false).contains(id); 075 } 076 077 public FeatureHolder getFeatures(FeatureFilter ff) throws BioException { 078 return getDelegateFH(true).filter(ff); 079 } 080 081 public FeatureHolder getFeatures(String id, FeatureFilter ff, boolean recurse) throws BioException { 082 if (! hasFeatures(id)) { 083 return FeatureHolder.EMPTY_FEATURE_HOLDER; 084 } 085 086 Sequence seq = populateDelegateFH(id); 087 return seq.filter(ff, recurse); 088 } 089 090 private Sequence populateDelegateFH(String id) { 091 Sequence seq = (Sequence) id2seq.get(id); 092 093 if(seq == null) { 094 SymbolList dummy = new DummySymbolList(DNATools.getDNA(), 1000000000); 095 seq = new SimpleSequence(dummy, id, id, Annotation.EMPTY_ANNOTATION); 096 097 try { 098 seq = gffe.getAnnotator().annotate(seq); 099 delegateFH.addFeatureHolder(seq); 100 id2seq.put(id, seq); 101 } catch (ChangeVetoException cve) { 102 throw new BioError(cve); 103 } catch (BioException be) { 104 throw new BioError(be); 105 } 106 } 107 108 return seq; 109 } 110 111 private FeatureHolder getDelegateFH(boolean populate) 112 throws BioException { 113 if(populate == true) { 114 for(Iterator i = ids(true).iterator(); i.hasNext(); ) { 115 populateDelegateFH((String) i.next()); 116 } 117 } 118 119 return delegateFH; 120 } 121 122 public Sequence getSequence(String id) throws BioException { 123 throw new BioException(); 124 } 125 126 public Set ids(boolean topLevel) throws BioException { 127 if (ids == null) { 128 Set _ids = new HashSet(); 129 130 for (Iterator i = gffe.lineIterator(); i.hasNext(); ) { 131 Object o = i.next(); 132 if (o instanceof GFFRecord) { 133 GFFRecord rec = (GFFRecord) o; 134 _ids.add(rec.getSeqName()); 135 } 136 } 137 138 ids = Collections.unmodifiableSet(_ids); 139 } 140 141 return ids; 142 } 143}