001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.db; 023 024import java.io.BufferedReader; 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.FileOutputStream; 028import java.io.FileReader; 029import java.io.FileWriter; 030import java.io.IOException; 031import java.io.ObjectInputStream; 032import java.io.ObjectOutputStream; 033import java.io.PrintWriter; 034import java.io.Serializable; 035import java.util.Collections; 036import java.util.HashMap; 037import java.util.HashSet; 038import java.util.Iterator; 039import java.util.Map; 040import java.util.Set; 041import java.util.StringTokenizer; 042 043import org.biojava.bio.BioException; 044import org.biojava.bio.seq.io.SequenceBuilderFactory; 045import org.biojava.bio.seq.io.SequenceFormat; 046import org.biojava.bio.seq.io.SymbolTokenization; 047import org.biojava.utils.AssertionFailure; 048import org.biojava.utils.OverlayMap; 049 050/** 051 * <p> 052 * Implements IndexStore as a serialized file for the java data and a 053 * tab-delimited file of offsets. 054 * </p> 055 * 056 * <p> 057 * Use the constructor to create a new index store. Use the static factory 058 * method open() to load an existing store. 059 * </p> 060 * 061 * The tab-delimited file looks like: 062 * <pre> 063 * fileNumber \t offset \t id \n 064 * </pre> 065 * 066 * @author Matthew Pocock 067 * @author Thomas Down 068 * @author Keith James 069 * @author David Huen 070 */ 071public class TabIndexStore implements IndexStore, Serializable { 072 /** 073 * Open an existing index store. 074 * 075 * @param storeFile the File encapsulating the store 076 * @return a new TabIndexStore for that file 077 * @throws IOException if the storeFile could not be processed 078 */ 079 public static TabIndexStore open(File storeFile) 080 throws IOException { 081 try { 082 FileInputStream fis = new FileInputStream(storeFile); 083 ObjectInputStream p = new ObjectInputStream(fis); 084 TabIndexStore indxStore = (TabIndexStore) p.readObject(); 085 fis.close(); 086 return indxStore; 087 } catch (ClassNotFoundException cnfe) { 088 throw new AssertionFailure("Assertion Failure: How did we get here?", cnfe); 089 } 090 } 091 092 093 // internal book-keeping for indices 094 private transient Map idToIndex; 095 private transient Map commited; 096 private transient Map uncommited; 097 098 // the two files for storing the store info and the actual table of indices 099 private final File storeFile; 100 private final File indexFile; 101 102 private final String name; 103 104 private final Set files; 105 private File[] seqFileIndex; 106 107 private final SequenceFormat format; 108 private final SequenceBuilderFactory sbFactory; 109 private final SymbolTokenization symbolParser; 110 111 /** 112 * Create a new TabIndexStore. 113 * 114 * <p> 115 * The store file and index file must not exist. This is to prevent you from 116 * accidentally destroying an existing index. 117 * </p> 118 * 119 * @param storeFile the file that will be used to persist this index store 120 * @param indexFile the file that will hold the actual indecies 121 * @param name the name that will be used by the database backed by 122 * this index 123 * @param format the SequenceFormat for files being indexed 124 * @param sbFactory the SequenceBuilderFactory used in building sequences 125 * @param symbolParser the SymbolTokenization to use 126 * @throws IOException if there was a problem writing the files 127 * @throws BioException if any of the parameters were not acceptable 128 */ 129 public TabIndexStore( 130 File storeFile, 131 File indexFile, 132 String name, 133 SequenceFormat format, 134 SequenceBuilderFactory sbFactory, 135 SymbolTokenization symbolParser 136 ) throws IOException, BioException { 137 if(storeFile.exists() || indexFile.exists()) { 138 throw new BioException("Files already exist: " + storeFile + " " + indexFile); 139 } 140 141 this.storeFile = storeFile.getAbsoluteFile(); 142 this.indexFile = indexFile.getAbsoluteFile(); 143 this.name = name; 144 this.format = format; 145 this.sbFactory = sbFactory; 146 this.symbolParser = symbolParser; 147 148 this.files = new HashSet(); 149 this.seqFileIndex = new File[0]; 150 151 this.commited = new HashMap(); 152 this.uncommited = new HashMap(); 153 this.idToIndex = new OverlayMap(commited, uncommited); 154 155 commit(); 156 } 157 158 public void store(Index indx) throws IllegalIDException, BioException { 159 if(idToIndex.containsKey(indx.getID())) { 160 throw new IllegalIDException("ID already in use: '" + indx.getID() + "'"); 161 } 162 163 addFile(indx.getFile()); 164 uncommited.put(indx.getID(), indx); 165 } 166 167 public Index fetch(String id) throws IllegalIDException, BioException { 168 Index indx = (Index) idToIndex.get(id); 169 170 if(indx == null) { 171 throw new IllegalIDException("No Index known for id '" + id + "'"); 172 } 173 174 return indx; 175 } 176 177 public void commit() throws BioException { 178 try { 179 PrintWriter out = new PrintWriter( 180 new FileWriter( 181 indexFile.toString(), true 182 ) 183 ); 184 for(Iterator i = uncommited.values().iterator(); i.hasNext(); ) { 185 Index indx = (Index) i.next(); 186 187 out.println( 188 getFileIndex(indx.getFile()) + "\t" + 189 indx.getStart() + "\t" + 190 indx.getID() 191 ); 192 } 193 194 commitStore(); 195 196 out.close(); 197 198 commited.putAll(uncommited); 199 uncommited.clear(); 200 } catch (IOException ioe) { 201 throw new BioException("Failed to commit",ioe); 202 } 203 } 204 205 public void rollback() { 206 uncommited.clear(); 207 } 208 209 public String getName() { 210 return name; 211 } 212 213 public Set getIDs() { 214 return Collections.unmodifiableSet(idToIndex.keySet()); 215 } 216 217 public Set getFiles() { 218 return Collections.unmodifiableSet(files); 219 } 220 221 public SequenceFormat getFormat() { 222 return format; 223 } 224 225 public SequenceBuilderFactory getSBFactory() { 226 return sbFactory; 227 } 228 229 public SymbolTokenization getSymbolParser() { 230 return symbolParser; 231 } 232 233 protected void commitStore() throws IOException { 234 FileOutputStream fos = new FileOutputStream(storeFile); 235 ObjectOutputStream p = new ObjectOutputStream(fos); 236 p.writeObject(this); 237 p.flush(); 238 fos.close(); 239 } 240 241 protected void addFile(File f) { 242 if(!files.contains(f)) { 243 int len = seqFileIndex.length; 244 files.add(f); 245 File[] sfi = new File[len + 1]; 246 System.arraycopy(this.seqFileIndex, 0, sfi, 0, len); 247 sfi[len] = f; 248 this.seqFileIndex = sfi; 249 } 250 } 251 252 protected int getFileIndex(File file) { 253 for(int pos = seqFileIndex.length-1; pos >= 0; pos--) { 254 File f = seqFileIndex[pos]; 255 // don't know if this construct is faster than a plain equals() 256 if(f == file || file.equals(f)) { 257 return pos; 258 } 259 } 260 261 throw new IndexOutOfBoundsException("Index not found for File '" + file + "'"); 262 } 263 264 protected void initialize() throws IOException { 265 if(indexFile.exists()) { 266 // load in stuff from the files 267 BufferedReader reader = new BufferedReader( 268 new FileReader(indexFile ) 269 ); 270 271 for( 272 String line = reader.readLine(); 273 line != null; 274 line = reader.readLine() 275 ) { 276 StringTokenizer stok = new StringTokenizer(line); 277 int fileNum = Integer.parseInt(stok.nextToken()); 278 long start = Long.parseLong(stok.nextToken()); 279 String id = stok.nextToken(); 280 281 SimpleIndex index = new SimpleIndex( 282 seqFileIndex[fileNum], 283 start, 284 -1, 285 id 286 ); 287 288 commited.put(id, index); 289 } 290 } 291 } 292 293 private void readObject(ObjectInputStream in) 294 throws IOException, ClassNotFoundException { 295 in.defaultReadObject(); 296 297 this.commited = new HashMap(); 298 this.uncommited = new HashMap(); 299 this.idToIndex = new OverlayMap(commited, uncommited); 300 301 this.initialize(); 302 } 303}