001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.db.flat; 023 024import java.io.ByteArrayInputStream; 025import java.io.File; 026import java.io.IOException; 027import java.io.InputStream; 028import java.util.NoSuchElementException; 029 030import org.biojava.bio.Annotation; 031import org.biojava.bio.BioException; 032import org.biojava.bio.program.indexdb.BioStore; 033import org.biojava.bio.program.indexdb.Record; 034import org.biojava.bio.seq.Sequence; 035import org.biojava.bio.seq.SequenceIterator; 036import org.biojava.bio.seq.db.IllegalIDException; 037import org.biojava.bio.seq.db.SequenceDBLite; 038import org.biojava.bio.seq.io.SeqIOTools; 039import org.biojava.bio.seq.io.SequenceBuilderFactory; 040import org.biojava.bio.seq.io.SequenceFormat; 041import org.biojava.bio.seq.io.StreamReader; 042import org.biojava.bio.seq.io.SymbolTokenization; 043import org.biojava.bio.symbol.Alphabet; 044import org.biojava.utils.ChangeVetoException; 045import org.biojava.utils.Unchangeable; 046import org.biojava.utils.io.RAF; 047import org.biojava.utils.lsid.LifeScienceIdentifier; 048import org.biojava.utils.lsid.LifeScienceIdentifierParseException; 049 050/** 051 * <code>FlatSequenceDB</code> is an OBDA flatfile sequence databank 052 * implementation. It is backed by an index created using the 053 * <code>org.biojava.bio.program.indexdb</code> package. 054 * 055 * @author Keith James 056 */ 057public class FlatSequenceDB extends Unchangeable implements SequenceDBLite 058{ 059 private BioStore index; 060 private String dbName; 061 private LifeScienceIdentifier format; 062 063 public FlatSequenceDB(String location, String dbName) 064 throws IOException, BioException 065 { 066 this.dbName = dbName; 067 index = new BioStore(new File(location), false); 068 069 try 070 { 071 Annotation config = index.getMetaData(); 072 String lsid = (String) config.getProperty("format"); 073 format = LifeScienceIdentifier.valueOf(lsid); 074 } 075 catch (NoSuchElementException nsee) 076 { 077 throw new BioException("Malformed OBDA index '" 078 + location 079 + "' does not indicate sequence format",nsee); 080 } 081 catch (LifeScienceIdentifierParseException lse) 082 { 083 throw new BioException("Malformed OBDA index '" 084 + location 085 + "' has a format identifier which is not a valid LSID",lse); 086 } 087 } 088 089 public String getName() 090 { 091 return dbName; 092 } 093 094 public Sequence getSequence(String id) 095 throws IllegalIDException, BioException 096 { 097 try 098 { 099 Record record = index.get(id); 100 RAF seqRAF = record.getFile(); 101 int recLength = record.getLength(); 102 seqRAF.seek(record.getOffset()); 103 104 byte [] bytes = new byte [recLength]; 105 seqRAF.readFully(bytes, 0, recLength); 106 InputStream is = new ByteArrayInputStream(bytes); 107 108 int formatId = SeqIOTools.identifyFormat(format.getNamespaceId(), 109 format.getObjectId()); 110 111 SequenceFormat sf = SeqIOTools.getSequenceFormat(formatId); 112 Alphabet alpha = SeqIOTools.getAlphabet(formatId); 113 SymbolTokenization toke = alpha.getTokenization("token"); 114 SequenceBuilderFactory sbf = SeqIOTools.getBuilderFactory(formatId); 115 116 SequenceIterator si = new StreamReader(is, sf, toke, sbf); 117 return si.nextSequence(); 118 } 119 catch (NoSuchElementException nsee) 120 { 121 throw new IllegalIDException("Failed to find sequence with ID " 122 + id 123 + " in database " 124 + getName()); 125 } 126 catch (IOException ioe) 127 { 128 throw new BioException("Failed to retrieve sequence with ID " 129 + id, ioe); 130 } 131 } 132 133 /** 134 * <code>addSequence</code> always throws a 135 * <code>ChangeVetoException</code> as this implementation is 136 * immutable. 137 * 138 * @param sequence a <code>Sequence</code>. 139 * 140 * @exception ChangeVetoException 141 */ 142 public void addSequence(Sequence sequence) throws ChangeVetoException 143 { 144 throw new ChangeVetoException("Failed to add sequence." 145 + " Sequences may not be added" 146 + " to a flat database"); 147 } 148 149 /** 150 * <code>removeSequence</code> always throws a 151 * <code>ChangeVetoException</code> as this implementation is 152 * immutable. 153 * 154 * @param id a <code>String</code>. 155 * 156 * @exception ChangeVetoException 157 */ 158 public void removeSequence(String id) throws ChangeVetoException 159 { 160 throw new ChangeVetoException("Failed to add sequence." 161 + " Sequences may not be removed" 162 + " from a flat database"); 163 } 164}