001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.db;
023
024import java.io.BufferedReader;
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.FileOutputStream;
028import java.io.FileReader;
029import java.io.FileWriter;
030import java.io.IOException;
031import java.io.ObjectInputStream;
032import java.io.ObjectOutputStream;
033import java.io.PrintWriter;
034import java.io.Serializable;
035import java.util.Collections;
036import java.util.HashMap;
037import java.util.HashSet;
038import java.util.Iterator;
039import java.util.Map;
040import java.util.Set;
041import java.util.StringTokenizer;
042
043import org.biojava.bio.BioException;
044import org.biojava.bio.seq.io.SequenceBuilderFactory;
045import org.biojava.bio.seq.io.SequenceFormat;
046import org.biojava.bio.seq.io.SymbolTokenization;
047import org.biojava.utils.AssertionFailure;
048import org.biojava.utils.OverlayMap;
049
050/**
051 * <p>
052 * Implements IndexStore as a serialized file for the java data and a
053 * tab-delimited file of offsets.
054 * </p>
055 *
056 * <p>
057 * Use the constructor to create a new index store. Use the static factory
058 * method open() to load an existing store.
059 * </p>
060 *
061 * The tab-delimited file looks like:
062 * <pre>
063 * fileNumber \t offset \t id \n
064 * </pre>
065 *
066 * @author Matthew Pocock
067 * @author Thomas Down
068 * @author Keith James
069 * @author David Huen
070 */
071public class TabIndexStore implements IndexStore, Serializable {
072  /**
073   * Open an existing index store.
074   *
075   * @param storeFile  the File encapsulating the store
076   * @return a new TabIndexStore for that file
077   * @throws IOException if the storeFile could not be processed
078   */
079  public static TabIndexStore open(File storeFile)
080  throws IOException {
081    try {
082      FileInputStream fis = new FileInputStream(storeFile);
083      ObjectInputStream p = new ObjectInputStream(fis);
084      TabIndexStore indxStore = (TabIndexStore) p.readObject();
085      fis.close();
086      return indxStore;
087    } catch (ClassNotFoundException cnfe) {
088      throw new AssertionFailure("Assertion Failure: How did we get here?", cnfe);
089    }
090  }
091
092
093  // internal book-keeping for indices
094  private transient Map idToIndex;
095  private transient Map commited;
096  private transient Map uncommited;
097
098  // the two files for storing the store info and the actual table of indices
099  private final File storeFile;
100  private final File indexFile;
101
102  private final String name;
103
104  private final Set files;
105  private File[] seqFileIndex;
106
107  private final SequenceFormat format;
108  private final SequenceBuilderFactory sbFactory;
109  private final SymbolTokenization symbolParser;
110
111  /**
112   * Create a new TabIndexStore.
113   *
114   * <p>
115   * The store file and index file must not exist. This is to prevent you from
116   * accidentally destroying an existing index.
117   * </p>
118   *
119   * @param storeFile     the file that will be used to persist this index store
120   * @param indexFile     the file that will hold the actual indecies
121   * @param name          the name that will be used by the database backed by
122   *                  this index
123   * @param format        the SequenceFormat for files being indexed
124   * @param sbFactory     the SequenceBuilderFactory used in building sequences
125   * @param symbolParser  the SymbolTokenization to use
126   * @throws IOException    if there was a problem writing the files
127   * @throws BioException   if any of the parameters were not acceptable
128   */
129  public TabIndexStore(
130    File storeFile,
131    File indexFile,
132    String name,
133    SequenceFormat format,
134    SequenceBuilderFactory sbFactory,
135    SymbolTokenization symbolParser
136  ) throws IOException, BioException {
137    if(storeFile.exists() || indexFile.exists()) {
138      throw new BioException("Files already exist: " + storeFile + " " + indexFile);
139    }
140
141    this.storeFile = storeFile.getAbsoluteFile();
142    this.indexFile = indexFile.getAbsoluteFile();
143    this.name = name;
144    this.format = format;
145    this.sbFactory = sbFactory;
146    this.symbolParser = symbolParser;
147
148    this.files = new HashSet();
149    this.seqFileIndex = new File[0];
150
151    this.commited = new HashMap();
152    this.uncommited = new HashMap();
153    this.idToIndex = new OverlayMap(commited, uncommited);
154
155    commit();
156  }
157
158  public void store(Index indx) throws IllegalIDException, BioException {
159    if(idToIndex.containsKey(indx.getID())) {
160      throw new IllegalIDException("ID already in use: '" + indx.getID() + "'");
161    }
162
163    addFile(indx.getFile());
164    uncommited.put(indx.getID(), indx);
165  }
166
167  public Index fetch(String id) throws IllegalIDException, BioException {
168    Index indx = (Index) idToIndex.get(id);
169
170    if(indx == null) {
171      throw new IllegalIDException("No Index known for id '" + id + "'");
172    }
173
174    return indx;
175  }
176
177  public void commit() throws BioException {
178    try {
179      PrintWriter out = new PrintWriter(
180        new FileWriter(
181          indexFile.toString(), true
182        )
183      );
184      for(Iterator i = uncommited.values().iterator(); i.hasNext(); ) {
185        Index indx = (Index) i.next();
186
187        out.println(
188          getFileIndex(indx.getFile()) + "\t" +
189          indx.getStart() + "\t" +
190          indx.getID()
191        );
192      }
193
194      commitStore();
195
196      out.close();
197
198      commited.putAll(uncommited);
199      uncommited.clear();
200    } catch (IOException ioe) {
201      throw new BioException("Failed to commit",ioe);
202    }
203  }
204
205  public void rollback() {
206    uncommited.clear();
207  }
208
209  public String getName() {
210    return name;
211  }
212
213  public Set getIDs() {
214    return Collections.unmodifiableSet(idToIndex.keySet());
215  }
216
217  public Set getFiles() {
218    return Collections.unmodifiableSet(files);
219  }
220
221  public SequenceFormat getFormat() {
222    return format;
223  }
224
225  public SequenceBuilderFactory getSBFactory() {
226    return sbFactory;
227  }
228
229  public SymbolTokenization getSymbolParser() {
230    return symbolParser;
231  }
232
233  protected void commitStore() throws IOException {
234    FileOutputStream fos = new FileOutputStream(storeFile);
235    ObjectOutputStream p = new ObjectOutputStream(fos);
236    p.writeObject(this);
237    p.flush();
238    fos.close();
239  }
240
241  protected void addFile(File f) {
242    if(!files.contains(f)) {
243      int len = seqFileIndex.length;
244      files.add(f);
245      File[] sfi = new File[len + 1];
246      System.arraycopy(this.seqFileIndex, 0, sfi, 0, len);
247      sfi[len] = f;
248      this.seqFileIndex = sfi;
249    }
250  }
251
252  protected int getFileIndex(File file) {
253    for(int pos = seqFileIndex.length-1; pos >= 0; pos--) {
254      File f = seqFileIndex[pos];
255       // don't know if this construct is faster than a plain equals()
256      if(f == file || file.equals(f)) {
257        return pos;
258      }
259    }
260
261    throw new IndexOutOfBoundsException("Index not found for File '" + file + "'");
262  }
263
264  protected void initialize() throws IOException {
265    if(indexFile.exists()) {
266      // load in stuff from the files
267      BufferedReader reader = new BufferedReader(
268        new FileReader(indexFile  )
269      );
270
271      for(
272        String line = reader.readLine();
273        line != null;
274        line = reader.readLine()
275      ) {
276        StringTokenizer stok = new StringTokenizer(line);
277        int fileNum = Integer.parseInt(stok.nextToken());
278        long start = Long.parseLong(stok.nextToken());
279        String id = stok.nextToken();
280
281        SimpleIndex index = new SimpleIndex(
282          seqFileIndex[fileNum],
283          start,
284          -1,
285          id
286        );
287
288        commited.put(id, index);
289      }
290    }
291  }
292
293  private void readObject(ObjectInputStream in)
294  throws IOException, ClassNotFoundException {
295    in.defaultReadObject();
296
297    this.commited = new HashMap();
298    this.uncommited = new HashMap();
299    this.idToIndex = new OverlayMap(commited, uncommited);
300
301    this.initialize();
302  }
303}