001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.indexdb;
023
024import java.io.File;
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Map;
033import java.util.StringTokenizer;
034
035import org.biojava.bio.Annotation;
036import org.biojava.bio.BioException;
037import org.biojava.utils.ChangeVetoException;
038import org.biojava.utils.CommitFailure;
039import org.biojava.utils.SmallMap;
040import org.biojava.utils.io.RAF;
041
042/**
043 * <code>BioStore</code>s represent directory and file structures
044 * which index flat files according to the OBDA specification. The
045 * preferred method of constructing new instances is to use
046 * <code>BioStoreFactory</code>.
047 *
048 * @author Matthew Pocock
049 * @author Keith James
050 */
051public class BioStore implements IndexStore {
052
053    /**
054     * <code>STRING_CASE_SENSITIVE_ORDER</code> compares two
055     * <code>Object</code>s, which must both be <code>String</code>s,
056     * lexicographically using <code>compareTo</code>. The comparison
057     * is carried out 'a' to 'b'.
058     */
059    static Comparator STRING_CASE_SENSITIVE_ORDER = new Comparator() {
060            public int compare(Object a, Object b) {
061                return ((Comparable) a).compareTo(b);
062            }
063        };
064    
065    private ConfigFile metaData;
066    private File location;
067    private String primaryKey;
068    private Map idToList;
069    private RAF[] fileIDToRAF;
070    private SearchableList primaryList;
071    private int fileCount;
072
073    /**
074     * Creates a new <code>BioStore</code> flatfile index at the
075     * specified location with the specified caching behaviour.
076     *
077     * @param location a <code>File</code> indicating the index
078     * directory.
079     * @param cache a <code>boolean</code> indicating whether the
080     * implementation should cache its state.
081     *
082     * @exception IOException if an error occurs.
083     * @exception BioException if an error occurs.
084     */
085    public BioStore(File location, boolean cache)
086        throws IOException, BioException {
087        this(location, cache, false);
088    }
089    
090    BioStore(File location, boolean cache, boolean mutable)
091        throws IOException, BioException {
092        this.location = location;
093
094        File configFile = BioStoreFactory.makeConfigFile(location);
095        if (!configFile.exists()) {
096            throw new BioException("Config file does not exist: "
097                                   + configFile);
098        }
099        metaData = new ConfigFile(BioStoreFactory.makeConfigFile(location));
100        idToList = new SmallMap();
101
102        primaryKey = (String) metaData.getProperty(BioStoreFactory.PRIMARY_KEY_NAME);
103        String keyList = (String) metaData.getProperty(BioStoreFactory.KEYS);
104
105        File plFile = BioStoreFactory.makePrimaryKeyFile(location, primaryKey);
106        if (cache) {
107            primaryList = new CacheList(new PrimaryIDList(plFile, this, mutable));
108        } else {
109            primaryList = new PrimaryIDList(plFile, this, mutable);
110        }
111
112        StringTokenizer sTok = new StringTokenizer(keyList, "\t");
113        while (sTok.hasMoreTokens()) {
114            String k = sTok.nextToken();
115
116            File file = BioStoreFactory.makeSecondaryFile(location, k);
117            if (cache) {
118                idToList.put(k, new CacheList(new SecondaryFileAsList(file, mutable)));
119            } else {
120                idToList.put(k, new SecondaryFileAsList(file, mutable));
121            }
122        }
123
124        readFileIDs();
125    }
126    
127    /**
128     * The name of this store or null if the name has not been set.
129     */
130    public String getName() {
131      if (metaData.containsProperty(BioStoreFactory.STORE_NAME)) {
132        return (String) metaData.getProperty(BioStoreFactory.STORE_NAME);
133      } else {
134        return null;
135      }
136    }
137    
138    /**
139     * <code>getLocation</code> returns the directory where the index
140     * is located.
141     *
142     * @return a <code>File</code>.
143     */
144    public File getLocation() {
145      return location;
146    }
147
148    private void readFileIDs()
149        throws
150            IOException,
151            BioException
152    {
153        fileIDToRAF = new RAF[5];
154        fileCount = 0;
155
156        for (Iterator i = metaData.keys().iterator(); i.hasNext(); ) {
157            String key = (String) i.next();
158            if (key.startsWith("fileid_")) {
159                int indx = Integer.parseInt(key.substring("fileid_".length()));
160                String fileLine = (String) metaData.getProperty(key);
161                int tab = fileLine.indexOf("\t");
162                File file = new File(fileLine.substring(0, tab));
163                RAF raf = new RAF(file, "r");
164                long length = Long.parseLong(fileLine.substring(tab+1));
165
166                if (file.length() != length) {
167                    throw new BioException("File changed length: " + file);
168                }
169                
170                if (indx >= fileCount) {
171                    // beyond end
172                    if (indx >= fileIDToRAF.length) {
173                        // beyond array end
174                        RAF[] tmpr = new RAF[indx+1];
175                        System.arraycopy(fileIDToRAF, 0, tmpr, 0, fileIDToRAF.length);
176                        fileIDToRAF = tmpr;
177                    }
178
179                    fileCount = indx;
180                }
181                fileIDToRAF[indx] = raf;
182            }
183        }
184    }
185
186    private void writeFileIDs()
187        throws BioException, IOException, ChangeVetoException {
188        for (int i = 0; i < fileCount; i++) {
189            RAF file = fileIDToRAF[i];
190            long length = file.length();
191            String prop = "fileid_" + i;
192            String val = file.getFile().toString() + "\t" + length;
193            metaData.setProperty(prop, val);
194        }
195    }
196
197    RAF getFileForID(int fileId) {
198        return fileIDToRAF[fileId];
199    }
200
201    int getIDForFile(RAF file)
202        throws IOException {
203        // scan list
204        for (int i = 0; i < fileCount; i++) {
205            if (file.equals(fileIDToRAF[i])) {
206                return i;
207            }
208        }
209
210        // extend fileIDToFile array
211        if (fileCount >= fileIDToRAF.length) {
212            RAF[] tmpr = new RAF[fileIDToRAF.length + 4]; // 4 is magic number
213            System.arraycopy(fileIDToRAF, 0, tmpr, 0, fileCount);
214            fileIDToRAF = tmpr;
215        }
216
217        // add the unseen file to the list
218        fileIDToRAF[fileCount] = file;
219        return fileCount++;
220    }
221
222    public Annotation getMetaData() {
223        return metaData;
224    }
225
226    public Record get(String id) {
227        return (Record) primaryList.search(id);
228    }
229
230    public List get(String id, String namespace)
231        throws BioException {
232        List hits = new ArrayList();
233        if (namespace.equals(primaryKey)) {
234            hits.add(primaryList.search(id));
235        } else {
236            SecondaryFileAsList secList = (SecondaryFileAsList) idToList.get(namespace);
237            List kpList = secList.searchAll(id);
238            for (Iterator i = kpList.iterator(); i.hasNext(); ) {
239                KeyPair keyPair = (KeyPair) i.next();
240                hits.add(primaryList.search(keyPair.getSecondary()));
241            }
242        }
243
244        return hits;
245    }
246
247    public void writeRecord(RAF file,
248                            long offset,
249                            int length,
250                            String id,
251                            Map secIDs) {
252        primaryList.add(new Record.Impl(id, file, offset, length));
253        if (!secIDs.isEmpty()) {
254            for (Iterator mei = secIDs.entrySet().iterator(); mei.hasNext(); ) {
255                Map.Entry me = (Map.Entry) mei.next();
256                String sid = (String) me.getKey();
257                List sfl = (List) idToList.get(sid);
258                Collection svals = (Collection) me.getValue();
259                for (Iterator i = svals.iterator(); i.hasNext(); ) {
260                    String sval = (String) i.next();
261                    sfl.add(new KeyPair.Impl(sval, id));
262                }
263            }
264        }
265    }
266
267    /**
268     * <code>getRecordList</code> returns all the <code>Record</code>s
269     * in the index.
270     *
271     * @return a <code>List</code> of <code>Record</code>s.
272     */
273    public List getRecordList() {
274        return primaryList;
275    }
276
277    /**
278     * <code>commit</code> writes an index to disk.
279     *
280     * @exception CommitFailure if an error occurs.
281     */
282    public void commit()
283        throws CommitFailure {
284        Collections.sort(primaryList, primaryList.getComparator());
285        primaryList.commit();
286        for (Iterator i = idToList.values().iterator(); i.hasNext(); ) {
287            SearchableList fal = (SearchableList) i.next();
288            Collections.sort(fal, fal.getComparator());
289            ((SearchableList) fal).commit();
290        }
291
292        try {
293            writeFileIDs();
294        } catch (ChangeVetoException cve) {
295            throw new CommitFailure(cve);
296        } catch (IOException ioe) {
297            throw new CommitFailure(ioe);
298        } catch (BioException be) {
299          throw new CommitFailure(be);
300        }
301
302        metaData.commit();
303    }
304}