001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.db;
023
024import java.util.Set;
025
026/**
027 * <p>
028 * A SequenceDBInstallation has the functionality of a factory for
029 * SequenceDB objects and additionally manages the SequenceDB objects
030 * created by itself such that the minimum number of SequenceDB
031 * objects is created by a particular SequenceDBInstallation
032 * object.
033 * </p>
034 *
035 * <p>
036 * The idea behind this interface is that sequence databases are
037 * usually installed in groups. E.g., there might be a directory which
038 * contains FASTA-formated sequence files for EMBL and SwissProt; or
039 * there might be an SRS-installation that provides access to GenBank
040 * and SwissProt; or there might be a relational database that can be
041 * queried for GenBank, PIR and SwissProt entries. These 3 cases would
042 * be represented through 3 distinct SequenceDBInstallation
043 * objects. Each of these objects can be queried for the set of
044 * SequenceDB objects it supports, or a particular SequenceDB object
045 * can be retrieved from a SequenceDBInstallation object through a
046 * string identifier.  All SequenceDB objects that belong to a
047 * particular SequenceDBInstallation share the same way of retrieving
048 * sequences and will hence be constructed and configured in a very
049 * similar fashion - which is the primary reason for inventing the
050 * SequenceDBInstallation object which can act as a factory for
051 * SequenceDB objects.
052 * </p>
053 *
054 * <p>
055 * A SequenceDBInstallation object also manages the SequenceDB
056 * objects it has created so that requests for the same database (say
057 * SwissProt) will always return the same SequenceDB object. This
058 * becomes particularly important when SequenceDB objects allow the
059 * modification (create/update/delete of Sequence entries) of the
060 * underlying sequence database and this sequence "database" is not
061 * transactional in itself (such as a FASTA file). Because in this
062 * case the SequenceDB object must act as a transactional front-end to
063 * the sequence database and there should really be only one
064 * SequenceDB object for each sequence database - which is ensured by
065 * SequenceDBInstallation.
066 * </p>
067 *
068 * @author <a href="mailto:Gerald.Loeffler@vienna.at">Gerald
069 * Loeffler</a> for the <a href="http://www.imp.univie.ac.at">IMP</a>
070 */
071public interface SequenceDBInstallation {
072    /**
073     * Return all sequence dbs available in this sequence db
074     * installation. This is not just the set of sequence dbs already
075     * returned by getSequenceDB() but the entire set of sequence dbs
076     * supported by this object.
077     *
078     * @return a set of SequenceDB objects which may be empty. An
079     * implementation may also return null if it is not at all possible
080     * to determine which sequence dbs are part of this installation.
081     */
082    public Set getSequenceDBs();
083
084    /**
085     * <p>
086     * Return the SequenceDB for the given identifier. The identifier
087     * can (but need not) be the name of the sequence db.  An
088     * implementation may support any number of identifiers to
089     * (uniquely) identify a particular sequence db - but the name of
090     * the sequence db (returned by SequenceDB.getName()) must always be
091     * among them.
092     * </p>
093     *
094     * <p>
095     * If the sequence db identified by the given identifier has not
096     * been requested through this object, it will be created and
097     * returned (hence this method is a factory method). If the sequence
098     * db identified by the given identifier has already been requested,
099     * the same object is returned.
100     * </p>
101     *
102     * @param identifier the string that identifies the sequence db. May
103     * not be null.
104     *
105     * @return the SequenceDB object that matches the given identifier
106     * or null if no such SequenceDB object could be found. (It is the
107     * responsibility of the implementation to take care that all
108     * identifiers are unique so if it turns out that the given
109     * identifier identifies more than one sequence db, this method
110     * should throw a RuntimeException.)
111     */
112    public SequenceDBLite getSequenceDB(String identifier);
113
114    /**
115     * <code>addSequenceDB</code> adds a new <code>SequenceDB</code>
116     * under its own identifier which will additionally be recognised
117     * by the set of other identifiers. It is up to the implementation
118     * as to how conflicting identifiers are handled.
119     *
120     * @param sequenceDB a <code>SequenceDB</code>.
121     * @param otherIdentifiers a <code>Set</code>.
122     */
123    public void addSequenceDB(SequenceDBLite sequenceDB, Set otherIdentifiers);
124}