001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.indexdb;
023
024import java.io.File;
025import java.io.IOException;
026import java.util.Iterator;
027import java.util.Map;
028import java.util.Set;
029
030import org.biojava.bio.AnnotationType;
031import org.biojava.bio.BioException;
032import org.biojava.bio.CardinalityConstraint;
033import org.biojava.bio.PropertyConstraint;
034import org.biojava.utils.AssertionFailure;
035import org.biojava.utils.ChangeVetoException;
036import org.biojava.utils.CommitFailure;
037import org.biojava.utils.SmallMap;
038import org.biojava.utils.lsid.LifeScienceIdentifier;
039
040/**
041 * <code>BioStoreFactory</code> creates <code>BioStore</code>
042 * instances. These are directory and file structures which index flat
043 * files according to the OBDA specification.
044 *
045 * @author Matthew Pocock
046 * @author Keith James
047 * @author Greg Cox
048 */
049public class BioStoreFactory {
050    /**
051     * <code>STORE_NAME</code> is the key used to identify the
052     * arbitrary name of the store in the OBDA config.dat files.
053     */
054    public static final String STORE_NAME = "name";
055
056    /**
057     * <code>SEQUENCE_FORMAT</code> is the key used to identify the
058     * format of the indexed sequence files represented by the store
059     * in the OBDA config.dat files.
060     */
061    public static final String SEQUENCE_FORMAT = "format";
062
063    /**
064     * <code>PRIMARY_KEY_NAME</code> is the key used to identify the
065     * primary namespace in the OBDA config.dat files.
066     */
067    public static final String PRIMARY_KEY_NAME = "primary_namespace";
068
069    /**
070     * <code>KEYS</code> is the key used to identify the secondary
071     * namespaces in the OBDA config.dat files.
072     */
073    public static final String KEYS = "secondary_namespaces";
074
075    /**
076     * AnnotationType that all meta-data files should fit.
077     */
078    public static final AnnotationType META_DATA_TYPE;
079
080    static {
081        try {
082            META_DATA_TYPE = new AnnotationType.Impl();
083            META_DATA_TYPE.setDefaultConstraints(PropertyConstraint.ANY,
084                                                 CardinalityConstraint.ANY);
085        
086            META_DATA_TYPE.setConstraints(BioStoreFactory.PRIMARY_KEY_NAME,
087                                          new PropertyConstraint.ByClass(String.class),
088                                          CardinalityConstraint.ONE);
089
090            META_DATA_TYPE.setConstraints("index",
091                                          new PropertyConstraint.ByClass(String.class),
092                                          CardinalityConstraint.ONE);
093
094            META_DATA_TYPE.setConstraints("format",
095                                          new PropertyConstraint.ByClass(LifeScienceIdentifier.class),
096                                          CardinalityConstraint.ONE);
097
098            META_DATA_TYPE.setConstraints(BioStoreFactory.KEYS,
099                                          new PropertyConstraint.ByClass(String.class),
100                                          CardinalityConstraint.ONE);
101
102            META_DATA_TYPE.setConstraints("name",
103                                          new PropertyConstraint.ByClass(String.class),
104                                          CardinalityConstraint.ZERO_OR_ONE);
105        } catch (Exception e) {
106            throw new Error(e);
107        }
108    }
109
110    private File storeLoc;
111    private String primaryKey;
112    private Map keys;
113    private String name;
114    private LifeScienceIdentifier format;
115
116    /**
117     * Creates a new <code>BioStoreFactory</code>.
118     */
119    public BioStoreFactory() {
120        keys = new SmallMap();
121    }
122
123    /**
124     * <code>setStoreName</code> sets the name to be given to the new
125     * index.
126     *
127     * @param name a <code>String</code>.
128     */
129    public void setStoreName(String name) {
130        this.name = name;
131    }
132
133    /**
134     * <code>getStoreName</code> returns the name to be given to the
135     * new index.
136     *
137     * @return a <code>String</code>.
138     */
139    public String getStoreName() {
140        return name;
141    }
142
143    /**
144     * <code>setStoreLocation</code> sets the directory of the new
145     * index.
146     *
147     * @param storeLoc a <code>File</code>.
148     */
149    public void setStoreLocation(File storeLoc) {
150        this.storeLoc = storeLoc;
151    }
152
153    /**
154     * <code>getStoreLocation</code> returns the directory of the bew
155     * index.
156     *
157     * @return a <code>File</code>.
158     */
159    public File getStoreLocation() {
160        return storeLoc;
161    }
162
163    /**
164     * <code>setSequenceFormat</code> sets the sequence format name
165     * which will be indicated in the index.
166     *
167     * @param format a <code>LifeScienceIdentifier</code> which must
168     * be one of those mandated by the OBDA flatfile indexing
169     * specification.
170     */
171    public void setSequenceFormat(LifeScienceIdentifier format) {
172        this.format = format;
173    }
174
175    /**
176     * <code>getSequenceFormat</code> returns the current sequence
177     * format name.
178     *
179     * @return a <code>LifeScienceIdentifier</code>.
180     */
181    public LifeScienceIdentifier getSequenceFormat()
182    {
183        return format;
184    }
185
186    /**
187     * <code>setPrimaryKey</code> sets the primary identifier
188     * namespace.
189     *
190     * @param primaryKey a <code>String</code>.
191     */
192    public void setPrimaryKey(String primaryKey) {
193        this.primaryKey = primaryKey;
194    }
195
196    /**
197     * <code>getPrimaryKey</code> returns the primary identifier
198     * namespace.
199     *
200     * @return a <code>String</code>.
201     */
202    public String getPrimaryKey() {
203        return primaryKey;
204    }
205
206    /**
207     * <code>addKey</code> adds a new identifier namespace.
208     *
209     * @param keyName a <code>String</code>.
210     * @param length an <code>int</code> indicating the byte length of
211     * the key records.
212     */
213    public void addKey(String keyName, int length) {
214        keys.put(keyName, new Integer(length));
215    }
216
217    public Set getKeys() {
218      return keys.keySet();
219    }
220
221    /**
222     * <code>removeKey</code> removes the specified
223     * key.
224     *
225     * @param keyName a <code>String</code>.
226     */
227    public void removeKey(String keyName) {
228        keys.remove(keyName);
229    }
230
231    /**
232     * <code>createBioStore</code> creates a <code>BioStore</code>
233     * reflecting the current state of the factory and returns a
234     * reference to it.
235     *
236     * @return a <code>BioStore</code>.
237     *
238     * @exception BioException if an error occurs.
239     */
240    public BioStore createBioStore()
241        throws BioException {
242        try {
243            if (storeLoc.exists()) {
244                throw new BioException("Store location already exists."
245                                       + " Delete first: " + storeLoc);
246            }
247
248            if (!keys.containsKey(primaryKey)) {
249                throw new BioException("Primary key is not listed as a key: "
250                                       + primaryKey);
251            }
252
253            if (name == null) {
254              throw new BioException("Store does not have a anme set");
255            }
256
257            if (format == null) {
258              throw new BioException("Format not set");
259            }
260
261            storeLoc.mkdirs();
262            ConfigFile ann = new ConfigFile(makeConfigFile(storeLoc));
263            ann.setProperty("index", "flat/1");
264
265            // database name
266            ann.setProperty(STORE_NAME, name);
267            // sequence format
268            ann.setProperty(SEQUENCE_FORMAT, format.toString());
269            // primary key data
270            ann.setProperty(PRIMARY_KEY_NAME, primaryKey);
271
272            StringBuffer keyList = new StringBuffer();
273
274            // other keys data
275            for (Iterator ki = keys.keySet().iterator(); ki.hasNext(); ) {
276                String key = (String) ki.next();
277                int length = ((Integer) keys.get(key)).intValue();
278
279                if (key.equals(primaryKey)) {
280                    new PrimaryIDList(makePrimaryKeyFile(storeLoc, key),
281                                      calculatePrimRecLen(length),
282                                      null);
283                } else {
284                    new SecondaryFileAsList(makeSecondaryFile(storeLoc, key),
285                                            calculateSecRecLen(length, primaryKey, keys));
286
287                    if (keyList.length() != 0) {
288                        keyList.append("\t");
289                    }
290                    keyList.append(key);
291                }
292            }
293
294            ann.setProperty(KEYS, keyList.substring(0));
295            ann.commit();
296
297            BioStore bStore = new BioStore(storeLoc, true, true);
298
299            return bStore;
300        } catch (ChangeVetoException cve) {
301            throw new AssertionFailure("Assertion Failure: Can't update annotation", cve);
302        } catch (IOException ioe) {
303            throw new AssertionFailure("Could not initialize store", ioe);
304        } catch (CommitFailure cf) {
305          throw new AssertionFailure("Could not commit store", cf);
306        }
307    }
308
309    /**
310     * <code>makeConfigFile</code> returns a file which represents an
311     * OBDA "config.dat" in the specified index directory.
312     *
313     * @param storeLoc a <code>File</code> indicating the index
314     * directory.
315     *
316     * @return a <code>File</code> representing "config.dat".
317     *
318     * @exception IOException if an error occurs.
319     */
320    public static File makeConfigFile(File storeLoc)
321        throws IOException {
322        return new File(storeLoc, "config.dat");
323    }
324
325    /**
326     * <code>makePrimaryKeyFile</code> returns a file which represents
327     * an OBDA "key_&lt;primary namespace&gt;.key" primary key file on the
328     * specified index directory.
329     *
330     * @param storeLoc a <code>File</code> indicating the parent path.
331     * @param key a <code>String</code> primary key namespace.
332     *
333     * @return a <code>File</code> representing a "key_&lt;primary
334     * namespace&gt;.key".
335     *
336     * @exception IOException if an error occurs.
337     */
338    public static File makePrimaryKeyFile(File storeLoc, String key)
339        throws IOException {
340        return new File(storeLoc, "key_" + key + ".key");
341    }
342
343    /**
344     * <code>makeSecondaryFile</code> returns a file which represents
345     * an OBDA "id_&lt;secondary namespace&gt;.index" secondary key file on
346     * the specified.
347     *
348     * @param storeLoc a <code>File</code> indicating the parent path.
349     * @param key a <code>String</code> secondary key namespace.
350     *
351     * @return a <code>File</code> representing an "id_&lt;secondary
352     * namespace&gt;.index" file.
353     *
354     * @exception IOException if an error occurs.
355     */
356    public static File makeSecondaryFile(File storeLoc, String key)
357        throws IOException {
358        return new File(storeLoc, "id_" + key + ".index");
359    }
360
361    /**
362     * <code>calculatePrimRecLen</code> calculates the byte length of
363     * primary namespace records.
364     *
365     * @param idLen an <code>int</code> the number of bytes required
366     * to hold the primary namespace ID.
367     *
368     * @return an <code>int</code> record length in bytes.
369     */
370    public static int calculatePrimRecLen(int idLen) {
371        return
372            idLen +                                     // space for ids
373            "\t".length() +
374            4 +                                         // file id
375            "\t".length() +
376            String.valueOf(Long.MAX_VALUE).length() +   // offset
377            "\t".length() +
378            String.valueOf(Integer.MAX_VALUE).length(); // length
379    }
380
381    /**
382     * <code>calculateSecRecLen</code> calculates the byte length of
383     * secondary namespace records.
384     *
385     * @param idLen an <code>int</code> the number of bytes required
386     * to hold the secondary namespace ID.
387     *
388     * @param primaryKey a <code>String</code> the primary namespace
389     * ID.
390     * @param keys a <code>Map</code> of secondary keys to their byte
391     * lengths.
392     *
393     * @return an <code>int</code> record length in bytes.
394     */
395    public static int calculateSecRecLen(int idLen, String primaryKey, Map keys) {
396        int primLength = ((Integer) keys.get(primaryKey)).intValue();
397        return
398            idLen +
399            "\t".length() +
400            primLength;
401    }
402}