001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.program.indexdb; 023 024import java.io.File; 025import java.io.IOException; 026import java.util.Iterator; 027import java.util.Map; 028import java.util.Set; 029 030import org.biojava.bio.AnnotationType; 031import org.biojava.bio.BioException; 032import org.biojava.bio.CardinalityConstraint; 033import org.biojava.bio.PropertyConstraint; 034import org.biojava.utils.AssertionFailure; 035import org.biojava.utils.ChangeVetoException; 036import org.biojava.utils.CommitFailure; 037import org.biojava.utils.SmallMap; 038import org.biojava.utils.lsid.LifeScienceIdentifier; 039 040/** 041 * <code>BioStoreFactory</code> creates <code>BioStore</code> 042 * instances. These are directory and file structures which index flat 043 * files according to the OBDA specification. 044 * 045 * @author Matthew Pocock 046 * @author Keith James 047 * @author Greg Cox 048 */ 049public class BioStoreFactory { 050 /** 051 * <code>STORE_NAME</code> is the key used to identify the 052 * arbitrary name of the store in the OBDA config.dat files. 053 */ 054 public static final String STORE_NAME = "name"; 055 056 /** 057 * <code>SEQUENCE_FORMAT</code> is the key used to identify the 058 * format of the indexed sequence files represented by the store 059 * in the OBDA config.dat files. 060 */ 061 public static final String SEQUENCE_FORMAT = "format"; 062 063 /** 064 * <code>PRIMARY_KEY_NAME</code> is the key used to identify the 065 * primary namespace in the OBDA config.dat files. 066 */ 067 public static final String PRIMARY_KEY_NAME = "primary_namespace"; 068 069 /** 070 * <code>KEYS</code> is the key used to identify the secondary 071 * namespaces in the OBDA config.dat files. 072 */ 073 public static final String KEYS = "secondary_namespaces"; 074 075 /** 076 * AnnotationType that all meta-data files should fit. 077 */ 078 public static final AnnotationType META_DATA_TYPE; 079 080 static { 081 try { 082 META_DATA_TYPE = new AnnotationType.Impl(); 083 META_DATA_TYPE.setDefaultConstraints(PropertyConstraint.ANY, 084 CardinalityConstraint.ANY); 085 086 META_DATA_TYPE.setConstraints(BioStoreFactory.PRIMARY_KEY_NAME, 087 new PropertyConstraint.ByClass(String.class), 088 CardinalityConstraint.ONE); 089 090 META_DATA_TYPE.setConstraints("index", 091 new PropertyConstraint.ByClass(String.class), 092 CardinalityConstraint.ONE); 093 094 META_DATA_TYPE.setConstraints("format", 095 new PropertyConstraint.ByClass(LifeScienceIdentifier.class), 096 CardinalityConstraint.ONE); 097 098 META_DATA_TYPE.setConstraints(BioStoreFactory.KEYS, 099 new PropertyConstraint.ByClass(String.class), 100 CardinalityConstraint.ONE); 101 102 META_DATA_TYPE.setConstraints("name", 103 new PropertyConstraint.ByClass(String.class), 104 CardinalityConstraint.ZERO_OR_ONE); 105 } catch (Exception e) { 106 throw new Error(e); 107 } 108 } 109 110 private File storeLoc; 111 private String primaryKey; 112 private Map keys; 113 private String name; 114 private LifeScienceIdentifier format; 115 116 /** 117 * Creates a new <code>BioStoreFactory</code>. 118 */ 119 public BioStoreFactory() { 120 keys = new SmallMap(); 121 } 122 123 /** 124 * <code>setStoreName</code> sets the name to be given to the new 125 * index. 126 * 127 * @param name a <code>String</code>. 128 */ 129 public void setStoreName(String name) { 130 this.name = name; 131 } 132 133 /** 134 * <code>getStoreName</code> returns the name to be given to the 135 * new index. 136 * 137 * @return a <code>String</code>. 138 */ 139 public String getStoreName() { 140 return name; 141 } 142 143 /** 144 * <code>setStoreLocation</code> sets the directory of the new 145 * index. 146 * 147 * @param storeLoc a <code>File</code>. 148 */ 149 public void setStoreLocation(File storeLoc) { 150 this.storeLoc = storeLoc; 151 } 152 153 /** 154 * <code>getStoreLocation</code> returns the directory of the bew 155 * index. 156 * 157 * @return a <code>File</code>. 158 */ 159 public File getStoreLocation() { 160 return storeLoc; 161 } 162 163 /** 164 * <code>setSequenceFormat</code> sets the sequence format name 165 * which will be indicated in the index. 166 * 167 * @param format a <code>LifeScienceIdentifier</code> which must 168 * be one of those mandated by the OBDA flatfile indexing 169 * specification. 170 */ 171 public void setSequenceFormat(LifeScienceIdentifier format) { 172 this.format = format; 173 } 174 175 /** 176 * <code>getSequenceFormat</code> returns the current sequence 177 * format name. 178 * 179 * @return a <code>LifeScienceIdentifier</code>. 180 */ 181 public LifeScienceIdentifier getSequenceFormat() 182 { 183 return format; 184 } 185 186 /** 187 * <code>setPrimaryKey</code> sets the primary identifier 188 * namespace. 189 * 190 * @param primaryKey a <code>String</code>. 191 */ 192 public void setPrimaryKey(String primaryKey) { 193 this.primaryKey = primaryKey; 194 } 195 196 /** 197 * <code>getPrimaryKey</code> returns the primary identifier 198 * namespace. 199 * 200 * @return a <code>String</code>. 201 */ 202 public String getPrimaryKey() { 203 return primaryKey; 204 } 205 206 /** 207 * <code>addKey</code> adds a new identifier namespace. 208 * 209 * @param keyName a <code>String</code>. 210 * @param length an <code>int</code> indicating the byte length of 211 * the key records. 212 */ 213 public void addKey(String keyName, int length) { 214 keys.put(keyName, new Integer(length)); 215 } 216 217 public Set getKeys() { 218 return keys.keySet(); 219 } 220 221 /** 222 * <code>removeKey</code> removes the specified 223 * key. 224 * 225 * @param keyName a <code>String</code>. 226 */ 227 public void removeKey(String keyName) { 228 keys.remove(keyName); 229 } 230 231 /** 232 * <code>createBioStore</code> creates a <code>BioStore</code> 233 * reflecting the current state of the factory and returns a 234 * reference to it. 235 * 236 * @return a <code>BioStore</code>. 237 * 238 * @exception BioException if an error occurs. 239 */ 240 public BioStore createBioStore() 241 throws BioException { 242 try { 243 if (storeLoc.exists()) { 244 throw new BioException("Store location already exists." 245 + " Delete first: " + storeLoc); 246 } 247 248 if (!keys.containsKey(primaryKey)) { 249 throw new BioException("Primary key is not listed as a key: " 250 + primaryKey); 251 } 252 253 if (name == null) { 254 throw new BioException("Store does not have a anme set"); 255 } 256 257 if (format == null) { 258 throw new BioException("Format not set"); 259 } 260 261 storeLoc.mkdirs(); 262 ConfigFile ann = new ConfigFile(makeConfigFile(storeLoc)); 263 ann.setProperty("index", "flat/1"); 264 265 // database name 266 ann.setProperty(STORE_NAME, name); 267 // sequence format 268 ann.setProperty(SEQUENCE_FORMAT, format.toString()); 269 // primary key data 270 ann.setProperty(PRIMARY_KEY_NAME, primaryKey); 271 272 StringBuffer keyList = new StringBuffer(); 273 274 // other keys data 275 for (Iterator ki = keys.keySet().iterator(); ki.hasNext(); ) { 276 String key = (String) ki.next(); 277 int length = ((Integer) keys.get(key)).intValue(); 278 279 if (key.equals(primaryKey)) { 280 new PrimaryIDList(makePrimaryKeyFile(storeLoc, key), 281 calculatePrimRecLen(length), 282 null); 283 } else { 284 new SecondaryFileAsList(makeSecondaryFile(storeLoc, key), 285 calculateSecRecLen(length, primaryKey, keys)); 286 287 if (keyList.length() != 0) { 288 keyList.append("\t"); 289 } 290 keyList.append(key); 291 } 292 } 293 294 ann.setProperty(KEYS, keyList.substring(0)); 295 ann.commit(); 296 297 BioStore bStore = new BioStore(storeLoc, true, true); 298 299 return bStore; 300 } catch (ChangeVetoException cve) { 301 throw new AssertionFailure("Assertion Failure: Can't update annotation", cve); 302 } catch (IOException ioe) { 303 throw new AssertionFailure("Could not initialize store", ioe); 304 } catch (CommitFailure cf) { 305 throw new AssertionFailure("Could not commit store", cf); 306 } 307 } 308 309 /** 310 * <code>makeConfigFile</code> returns a file which represents an 311 * OBDA "config.dat" in the specified index directory. 312 * 313 * @param storeLoc a <code>File</code> indicating the index 314 * directory. 315 * 316 * @return a <code>File</code> representing "config.dat". 317 * 318 * @exception IOException if an error occurs. 319 */ 320 public static File makeConfigFile(File storeLoc) 321 throws IOException { 322 return new File(storeLoc, "config.dat"); 323 } 324 325 /** 326 * <code>makePrimaryKeyFile</code> returns a file which represents 327 * an OBDA "key_<primary namespace>.key" primary key file on the 328 * specified index directory. 329 * 330 * @param storeLoc a <code>File</code> indicating the parent path. 331 * @param key a <code>String</code> primary key namespace. 332 * 333 * @return a <code>File</code> representing a "key_<primary 334 * namespace>.key". 335 * 336 * @exception IOException if an error occurs. 337 */ 338 public static File makePrimaryKeyFile(File storeLoc, String key) 339 throws IOException { 340 return new File(storeLoc, "key_" + key + ".key"); 341 } 342 343 /** 344 * <code>makeSecondaryFile</code> returns a file which represents 345 * an OBDA "id_<secondary namespace>.index" secondary key file on 346 * the specified. 347 * 348 * @param storeLoc a <code>File</code> indicating the parent path. 349 * @param key a <code>String</code> secondary key namespace. 350 * 351 * @return a <code>File</code> representing an "id_<secondary 352 * namespace>.index" file. 353 * 354 * @exception IOException if an error occurs. 355 */ 356 public static File makeSecondaryFile(File storeLoc, String key) 357 throws IOException { 358 return new File(storeLoc, "id_" + key + ".index"); 359 } 360 361 /** 362 * <code>calculatePrimRecLen</code> calculates the byte length of 363 * primary namespace records. 364 * 365 * @param idLen an <code>int</code> the number of bytes required 366 * to hold the primary namespace ID. 367 * 368 * @return an <code>int</code> record length in bytes. 369 */ 370 public static int calculatePrimRecLen(int idLen) { 371 return 372 idLen + // space for ids 373 "\t".length() + 374 4 + // file id 375 "\t".length() + 376 String.valueOf(Long.MAX_VALUE).length() + // offset 377 "\t".length() + 378 String.valueOf(Integer.MAX_VALUE).length(); // length 379 } 380 381 /** 382 * <code>calculateSecRecLen</code> calculates the byte length of 383 * secondary namespace records. 384 * 385 * @param idLen an <code>int</code> the number of bytes required 386 * to hold the secondary namespace ID. 387 * 388 * @param primaryKey a <code>String</code> the primary namespace 389 * ID. 390 * @param keys a <code>Map</code> of secondary keys to their byte 391 * lengths. 392 * 393 * @return an <code>int</code> record length in bytes. 394 */ 395 public static int calculateSecRecLen(int idLen, String primaryKey, Map keys) { 396 int primLength = ((Integer) keys.get(primaryKey)).intValue(); 397 return 398 idLen + 399 "\t".length() + 400 primLength; 401 } 402}