001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.db.flat;
023
024import java.io.ByteArrayInputStream;
025import java.io.File;
026import java.io.IOException;
027import java.io.InputStream;
028import java.util.NoSuchElementException;
029
030import org.biojava.bio.Annotation;
031import org.biojava.bio.BioException;
032import org.biojava.bio.program.indexdb.BioStore;
033import org.biojava.bio.program.indexdb.Record;
034import org.biojava.bio.seq.Sequence;
035import org.biojava.bio.seq.SequenceIterator;
036import org.biojava.bio.seq.db.IllegalIDException;
037import org.biojava.bio.seq.db.SequenceDBLite;
038import org.biojava.bio.seq.io.SeqIOTools;
039import org.biojava.bio.seq.io.SequenceBuilderFactory;
040import org.biojava.bio.seq.io.SequenceFormat;
041import org.biojava.bio.seq.io.StreamReader;
042import org.biojava.bio.seq.io.SymbolTokenization;
043import org.biojava.bio.symbol.Alphabet;
044import org.biojava.utils.ChangeVetoException;
045import org.biojava.utils.Unchangeable;
046import org.biojava.utils.io.RAF;
047import org.biojava.utils.lsid.LifeScienceIdentifier;
048import org.biojava.utils.lsid.LifeScienceIdentifierParseException;
049
050/**
051 * <code>FlatSequenceDB</code> is an OBDA flatfile sequence databank
052 * implementation. It is backed by an index created using the
053 * <code>org.biojava.bio.program.indexdb</code> package.
054 *
055 * @author Keith James
056 */
057public class FlatSequenceDB extends Unchangeable implements SequenceDBLite
058{
059    private BioStore index;
060    private String dbName;
061    private LifeScienceIdentifier format;
062
063    public FlatSequenceDB(String location, String dbName)
064        throws IOException, BioException
065    {
066        this.dbName = dbName;
067        index = new BioStore(new File(location), false);
068
069        try
070        {
071            Annotation config = index.getMetaData();
072            String lsid = (String) config.getProperty("format");
073            format = LifeScienceIdentifier.valueOf(lsid);
074        }
075        catch (NoSuchElementException nsee)
076        {
077            throw new BioException("Malformed OBDA index '"
078                                   + location
079                                   + "' does not indicate sequence format",nsee);
080        }
081        catch (LifeScienceIdentifierParseException lse)
082        {
083            throw new BioException("Malformed OBDA index '"
084                                   + location
085                                   + "' has a format identifier which is not a valid LSID",lse);
086        }
087    }
088
089    public String getName()
090    {
091        return dbName;
092    }
093
094    public Sequence getSequence(String id)
095        throws IllegalIDException, BioException
096    {
097        try
098        {
099            Record record = index.get(id);
100            RAF seqRAF = record.getFile();
101            int recLength = record.getLength();
102            seqRAF.seek(record.getOffset());
103
104            byte [] bytes = new byte [recLength];
105            seqRAF.readFully(bytes, 0, recLength);
106            InputStream is = new ByteArrayInputStream(bytes);
107
108            int formatId = SeqIOTools.identifyFormat(format.getNamespaceId(),
109                                                     format.getObjectId());
110
111            SequenceFormat sf = SeqIOTools.getSequenceFormat(formatId);
112            Alphabet alpha = SeqIOTools.getAlphabet(formatId);
113            SymbolTokenization toke = alpha.getTokenization("token");
114            SequenceBuilderFactory sbf = SeqIOTools.getBuilderFactory(formatId);
115
116            SequenceIterator si = new StreamReader(is, sf, toke, sbf);
117            return si.nextSequence();
118        }
119        catch (NoSuchElementException nsee)
120        {
121            throw new IllegalIDException("Failed to find sequence with ID "
122                                         + id
123                                         + " in database "
124                                         + getName());
125        }
126        catch (IOException ioe)
127        {
128            throw new BioException("Failed to retrieve sequence with ID "
129                                   + id, ioe);
130        }
131    }
132
133    /**
134     * <code>addSequence</code> always throws a
135     * <code>ChangeVetoException</code> as this implementation is
136     * immutable.
137     *
138     * @param sequence a <code>Sequence</code>.
139     *
140     * @exception ChangeVetoException
141     */
142    public void addSequence(Sequence sequence) throws ChangeVetoException
143    {
144        throw new ChangeVetoException("Failed to add sequence."
145                                      + " Sequences may not be added"
146                                      + " to a flat database");
147    }
148
149    /**
150     * <code>removeSequence</code> always throws a
151     * <code>ChangeVetoException</code> as this implementation is
152     * immutable.
153     *
154     * @param id a <code>String</code>.
155     *
156     * @exception ChangeVetoException
157     */
158    public void removeSequence(String id) throws ChangeVetoException
159    {
160        throw new ChangeVetoException("Failed to add sequence."
161                                      + " Sequences may not be removed"
162                                      + " from a flat database");
163    }
164}