001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.program.indexdb; 023 024import java.io.File; 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Map; 033import java.util.StringTokenizer; 034 035import org.biojava.bio.Annotation; 036import org.biojava.bio.BioException; 037import org.biojava.utils.ChangeVetoException; 038import org.biojava.utils.CommitFailure; 039import org.biojava.utils.SmallMap; 040import org.biojava.utils.io.RAF; 041 042/** 043 * <code>BioStore</code>s represent directory and file structures 044 * which index flat files according to the OBDA specification. The 045 * preferred method of constructing new instances is to use 046 * <code>BioStoreFactory</code>. 047 * 048 * @author Matthew Pocock 049 * @author Keith James 050 */ 051public class BioStore implements IndexStore { 052 053 /** 054 * <code>STRING_CASE_SENSITIVE_ORDER</code> compares two 055 * <code>Object</code>s, which must both be <code>String</code>s, 056 * lexicographically using <code>compareTo</code>. The comparison 057 * is carried out 'a' to 'b'. 058 */ 059 static Comparator STRING_CASE_SENSITIVE_ORDER = new Comparator() { 060 public int compare(Object a, Object b) { 061 return ((Comparable) a).compareTo(b); 062 } 063 }; 064 065 private ConfigFile metaData; 066 private File location; 067 private String primaryKey; 068 private Map idToList; 069 private RAF[] fileIDToRAF; 070 private SearchableList primaryList; 071 private int fileCount; 072 073 /** 074 * Creates a new <code>BioStore</code> flatfile index at the 075 * specified location with the specified caching behaviour. 076 * 077 * @param location a <code>File</code> indicating the index 078 * directory. 079 * @param cache a <code>boolean</code> indicating whether the 080 * implementation should cache its state. 081 * 082 * @exception IOException if an error occurs. 083 * @exception BioException if an error occurs. 084 */ 085 public BioStore(File location, boolean cache) 086 throws IOException, BioException { 087 this(location, cache, false); 088 } 089 090 BioStore(File location, boolean cache, boolean mutable) 091 throws IOException, BioException { 092 this.location = location; 093 094 File configFile = BioStoreFactory.makeConfigFile(location); 095 if (!configFile.exists()) { 096 throw new BioException("Config file does not exist: " 097 + configFile); 098 } 099 metaData = new ConfigFile(BioStoreFactory.makeConfigFile(location)); 100 idToList = new SmallMap(); 101 102 primaryKey = (String) metaData.getProperty(BioStoreFactory.PRIMARY_KEY_NAME); 103 String keyList = (String) metaData.getProperty(BioStoreFactory.KEYS); 104 105 File plFile = BioStoreFactory.makePrimaryKeyFile(location, primaryKey); 106 if (cache) { 107 primaryList = new CacheList(new PrimaryIDList(plFile, this, mutable)); 108 } else { 109 primaryList = new PrimaryIDList(plFile, this, mutable); 110 } 111 112 StringTokenizer sTok = new StringTokenizer(keyList, "\t"); 113 while (sTok.hasMoreTokens()) { 114 String k = sTok.nextToken(); 115 116 File file = BioStoreFactory.makeSecondaryFile(location, k); 117 if (cache) { 118 idToList.put(k, new CacheList(new SecondaryFileAsList(file, mutable))); 119 } else { 120 idToList.put(k, new SecondaryFileAsList(file, mutable)); 121 } 122 } 123 124 readFileIDs(); 125 } 126 127 /** 128 * The name of this store or null if the name has not been set. 129 */ 130 public String getName() { 131 if (metaData.containsProperty(BioStoreFactory.STORE_NAME)) { 132 return (String) metaData.getProperty(BioStoreFactory.STORE_NAME); 133 } else { 134 return null; 135 } 136 } 137 138 /** 139 * <code>getLocation</code> returns the directory where the index 140 * is located. 141 * 142 * @return a <code>File</code>. 143 */ 144 public File getLocation() { 145 return location; 146 } 147 148 private void readFileIDs() 149 throws 150 IOException, 151 BioException 152 { 153 fileIDToRAF = new RAF[5]; 154 fileCount = 0; 155 156 for (Iterator i = metaData.keys().iterator(); i.hasNext(); ) { 157 String key = (String) i.next(); 158 if (key.startsWith("fileid_")) { 159 int indx = Integer.parseInt(key.substring("fileid_".length())); 160 String fileLine = (String) metaData.getProperty(key); 161 int tab = fileLine.indexOf("\t"); 162 File file = new File(fileLine.substring(0, tab)); 163 RAF raf = new RAF(file, "r"); 164 long length = Long.parseLong(fileLine.substring(tab+1)); 165 166 if (file.length() != length) { 167 throw new BioException("File changed length: " + file); 168 } 169 170 if (indx >= fileCount) { 171 // beyond end 172 if (indx >= fileIDToRAF.length) { 173 // beyond array end 174 RAF[] tmpr = new RAF[indx+1]; 175 System.arraycopy(fileIDToRAF, 0, tmpr, 0, fileIDToRAF.length); 176 fileIDToRAF = tmpr; 177 } 178 179 fileCount = indx; 180 } 181 fileIDToRAF[indx] = raf; 182 } 183 } 184 } 185 186 private void writeFileIDs() 187 throws BioException, IOException, ChangeVetoException { 188 for (int i = 0; i < fileCount; i++) { 189 RAF file = fileIDToRAF[i]; 190 long length = file.length(); 191 String prop = "fileid_" + i; 192 String val = file.getFile().toString() + "\t" + length; 193 metaData.setProperty(prop, val); 194 } 195 } 196 197 RAF getFileForID(int fileId) { 198 return fileIDToRAF[fileId]; 199 } 200 201 int getIDForFile(RAF file) 202 throws IOException { 203 // scan list 204 for (int i = 0; i < fileCount; i++) { 205 if (file.equals(fileIDToRAF[i])) { 206 return i; 207 } 208 } 209 210 // extend fileIDToFile array 211 if (fileCount >= fileIDToRAF.length) { 212 RAF[] tmpr = new RAF[fileIDToRAF.length + 4]; // 4 is magic number 213 System.arraycopy(fileIDToRAF, 0, tmpr, 0, fileCount); 214 fileIDToRAF = tmpr; 215 } 216 217 // add the unseen file to the list 218 fileIDToRAF[fileCount] = file; 219 return fileCount++; 220 } 221 222 public Annotation getMetaData() { 223 return metaData; 224 } 225 226 public Record get(String id) { 227 return (Record) primaryList.search(id); 228 } 229 230 public List get(String id, String namespace) 231 throws BioException { 232 List hits = new ArrayList(); 233 if (namespace.equals(primaryKey)) { 234 hits.add(primaryList.search(id)); 235 } else { 236 SecondaryFileAsList secList = (SecondaryFileAsList) idToList.get(namespace); 237 List kpList = secList.searchAll(id); 238 for (Iterator i = kpList.iterator(); i.hasNext(); ) { 239 KeyPair keyPair = (KeyPair) i.next(); 240 hits.add(primaryList.search(keyPair.getSecondary())); 241 } 242 } 243 244 return hits; 245 } 246 247 public void writeRecord(RAF file, 248 long offset, 249 int length, 250 String id, 251 Map secIDs) { 252 primaryList.add(new Record.Impl(id, file, offset, length)); 253 if (!secIDs.isEmpty()) { 254 for (Iterator mei = secIDs.entrySet().iterator(); mei.hasNext(); ) { 255 Map.Entry me = (Map.Entry) mei.next(); 256 String sid = (String) me.getKey(); 257 List sfl = (List) idToList.get(sid); 258 Collection svals = (Collection) me.getValue(); 259 for (Iterator i = svals.iterator(); i.hasNext(); ) { 260 String sval = (String) i.next(); 261 sfl.add(new KeyPair.Impl(sval, id)); 262 } 263 } 264 } 265 } 266 267 /** 268 * <code>getRecordList</code> returns all the <code>Record</code>s 269 * in the index. 270 * 271 * @return a <code>List</code> of <code>Record</code>s. 272 */ 273 public List getRecordList() { 274 return primaryList; 275 } 276 277 /** 278 * <code>commit</code> writes an index to disk. 279 * 280 * @exception CommitFailure if an error occurs. 281 */ 282 public void commit() 283 throws CommitFailure { 284 Collections.sort(primaryList, primaryList.getComparator()); 285 primaryList.commit(); 286 for (Iterator i = idToList.values().iterator(); i.hasNext(); ) { 287 SearchableList fal = (SearchableList) i.next(); 288 Collections.sort(fal, fal.getComparator()); 289 ((SearchableList) fal).commit(); 290 } 291 292 try { 293 writeFileIDs(); 294 } catch (ChangeVetoException cve) { 295 throw new CommitFailure(cve); 296 } catch (IOException ioe) { 297 throw new CommitFailure(ioe); 298 } catch (BioException be) { 299 throw new CommitFailure(be); 300 } 301 302 metaData.commit(); 303 } 304}