001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.distributed;
023
024import java.util.Collections;
025import java.util.HashMap;
026import java.util.HashSet;
027import java.util.Iterator;
028import java.util.Map;
029import java.util.Set;
030
031import org.biojava.bio.Annotation;
032import org.biojava.bio.BioError;
033import org.biojava.bio.BioException;
034import org.biojava.bio.program.gff.GFFEntrySet;
035import org.biojava.bio.program.gff.GFFRecord;
036import org.biojava.bio.seq.DNATools;
037import org.biojava.bio.seq.FeatureFilter;
038import org.biojava.bio.seq.FeatureHolder;
039import org.biojava.bio.seq.MergeFeatureHolder;
040import org.biojava.bio.seq.Sequence;
041import org.biojava.bio.seq.impl.SimpleSequence;
042import org.biojava.bio.symbol.DummySymbolList;
043import org.biojava.bio.symbol.SymbolList;
044import org.biojava.utils.ChangeVetoException;
045
046/**
047 * Use a GFFEntrySet as a DataSource for adding annotation to sequences.
048 *
049 * Instantiate this and add it to an instance of DistributeSequenceDB. All
050 * of the GFF features that have sequence fields matching sequence IDs in the
051 * db will be merged in.
052 * 
053 * @author Thomas Down
054 * @author Matthew Pocock
055
056 */
057public class GFFDataSource implements DistDataSource {
058    private GFFEntrySet gffe;
059    private Set ids;
060    private Map id2seq;
061    private MergeFeatureHolder delegateFH;
062
063    public GFFDataSource(GFFEntrySet gffe) {
064        this.gffe = gffe;
065        this.id2seq = new HashMap();
066        delegateFH = new MergeFeatureHolder();
067    }
068
069    public boolean hasSequence(String id) throws BioException {
070        return false;
071    }
072
073    public boolean hasFeatures(String id) throws BioException {
074        return ids(false).contains(id);
075    }
076
077    public FeatureHolder getFeatures(FeatureFilter ff) throws BioException {
078        return getDelegateFH(true).filter(ff);
079    }
080
081    public FeatureHolder getFeatures(String id, FeatureFilter ff, boolean recurse) throws BioException {
082        if (! hasFeatures(id)) {
083            return FeatureHolder.EMPTY_FEATURE_HOLDER;
084        }
085        
086        Sequence seq = populateDelegateFH(id);
087        return seq.filter(ff, recurse);
088    }
089
090    private Sequence populateDelegateFH(String id) {
091      Sequence seq = (Sequence) id2seq.get(id);
092
093      if(seq == null) {
094        SymbolList dummy = new DummySymbolList(DNATools.getDNA(), 1000000000);
095        seq = new SimpleSequence(dummy, id, id, Annotation.EMPTY_ANNOTATION);
096
097        try {
098          seq = gffe.getAnnotator().annotate(seq);
099          delegateFH.addFeatureHolder(seq);
100          id2seq.put(id, seq);
101        } catch (ChangeVetoException cve) {
102          throw new BioError(cve);
103        } catch (BioException be) {
104          throw new BioError(be);
105        }
106      }
107
108      return seq;
109    }
110
111    private FeatureHolder getDelegateFH(boolean populate)
112    throws BioException {
113      if(populate == true) {
114        for(Iterator i = ids(true).iterator(); i.hasNext(); ) {
115          populateDelegateFH((String) i.next());
116        }
117      }
118
119      return delegateFH;
120    }
121
122    public Sequence getSequence(String id) throws BioException {
123        throw new BioException();
124    }
125
126    public Set ids(boolean topLevel) throws BioException {
127        if (ids == null) {
128            Set _ids = new HashSet();
129
130            for (Iterator i = gffe.lineIterator(); i.hasNext(); ) {
131                Object o = i.next();
132                if (o instanceof GFFRecord) {
133                    GFFRecord rec = (GFFRecord) o;
134                    _ids.add(rec.getSeqName());
135                }
136            }
137
138            ids = Collections.unmodifiableSet(_ids);
139        }
140
141        return ids;
142    }
143}