001// BibRefQuery.java
002//
003//    senger@ebi.ac.uk
004//    March 2001
005//
006
007/*
008 *                    BioJava development code
009 *
010 * This code may be freely distributed and modified under the
011 * terms of the GNU Lesser General Public Licence.  This should
012 * be distributed with the code.  If you do not have a copy,
013 * see:
014 *
015 *      http://www.gnu.org/copyleft/lesser.html
016 *
017 * Copyright for this code is held jointly by the individual
018 * authors.  These should be listed in @author doc comments.
019 *
020 * For more information on the BioJava project and its aims,
021 * or to join the biojava-l mailing list, visit the home page
022 * at:
023 *
024 *      http://www.biojava.org/
025 *
026 */
027package org.biojava.bibliography;
028
029import java.io.InputStream;
030import java.util.Enumeration;
031import java.util.Hashtable;
032
033/**
034 * <p>
035 * The interface <em>BibRefQuery</em> is a fundamental part of the Bibliographic Query
036 * Service. It allows searching for and retrieving citations from a bibliographic
037 * repository. The result of the query methods is again of type BibRefQuery which
038 * allows further to refine the query. When the caller is satisfied with the query
039 * results, the retrieval methods can be used to get either a list of citations (of type
040 * {@link BibRef}), or an XML document representing citations. 
041 * </p>
042 *
043 * <p>
044 * Squeezing all query and retrieval methods into one interface allows to build very
045 * flexible systems, both distributed (where the client and repository parts are
046 * executed on different computers) and stand-alone (both parts are linked
047 * together into one process).
048 * </p>
049 *
050 * <p>
051 *<table border=0 cellpadding=10>
052 *<tr><td valign="top">
053 * For example, this picture shows a client linked together
054 * with a repository implementation. These two parts communicate
055 * entirely via <em>BibRefQuery</em> interface. Each of them can be replaced
056 * without changing the other one.
057 *</td><th>
058 * <img src="doc-files/BibRefQuery_simple.jpg">
059 *</th></tr>
060 *<tr><td valign="top">
061 * In this example, a client uses <em>BibRefQuery</em> interface to communicate only
062 * with a local implementation of a distributed architecture (a CORBA communication
063 * protocol in this case). The repository implementation is similarly shielded by the
064 * same interface from the communication protocol layer.
065 *</td><th>
066 * <img src="doc-files/BibRefQuery_corba.jpg">
067 *</th></tr>
068 *<tr><td valign="top">
069 * The last picture shows yet another example of a distributed
070 * architecture showing the parts which must be changed when a
071 * different communication protocol is used (the SOAP-HTTP in this
072 * case). Note that both the client and repository implementation
073 * remained untouched.
074 *</td><th>
075 * <img src="doc-files/BibRefQuery_soap.jpg" align="left">
076 *</th></tr>
077 *</table>
078 * </p>
079 *
080 * <h3>The implementation is advised to used the following constructor</h3>
081 *
082 * <p>
083 *<pre>
084 *    public <em>NameOfAnImplementation</em> (String[] args, Hashtable props) {...}
085 *</pre>
086 *    where both <tt>args</tt> and <tt>props</tt> contain implementation
087 *    specific parameters and properties. However, some properties are
088 *    more probable to be used - the suggested names for them are defined
089 *    either in this interface or in the "sister" interface {@link BibRefSupport}.
090 * </p>
091 *
092 * <p>
093 * The use of this constructor makes easier to load dynamically different
094 * implementations.
095 * </p>
096 *
097 * <p>
098 * The methods of the <em>BibRefQuery</em> interface can be divided into three groups.
099 * The first group deals with connections to bibliographic repositories - here are
100 * methods {@link #connect connect}, {@link #disconnect disconnect}, {@link #destroy destroy},
101 * {@link #getCollectionId getCollectionId}, and {@link #isReady isReady}.
102 * </p>
103 *
104 * <p>
105 * The second and the most interesting group contains the query methods.
106 * As mentioned above, these methods (mostly) return an another query collection
107 * which is again query-able. Here belong methods {@link #find find}, {@link #findByAuthor findByAuthor},
108 * {@link #findById findById}, {@link #query query}, {@link #getBibRefCount getBibRefCount}, and {@link #sort sort}.
109 * </p>
110 *
111 * <p>
112 * The last group has methods for retrieving citations from the resulting query collection.
113 * The retrieval methods also allow to return citations not fully populated with all
114 * available attribute data (for example, the long abstracts can be asked for only later).
115 * Here belong methods {@link #getAllBibRefs getAllBibRefs}, {@link #getBibRefs getBibRefs},
116 * {@link #getAllIDs getAllIDs}, {@link #getAllBibRefsAsXML getAllBibRefsAsXML},
117 * {@link #getBibRefsAsXML getBibRefsAsXML}, and {@link #getBibRefAsXML getBibRefAsXML}. 
118 * </p>
119 *
120 * <a name="how_attr_names">
121 *<h3>Simple and Qualified Attribute Names</h3>
122 * <p>
123 * There are several places where method arguments represent attribute names:
124 * </p>
125 *
126 * <p>
127 *<ul>
128 *   <li> In query methods, such as {@link #find find}, a list of attributes that should be
129 *        searched.
130 *   <li> The query results are citations represented as {@link BibRef} instances
131 *        but not necessarily fully populated - they may contain only a subset of attributes,
132 *        the <em>excluded</em> attribute lists used in several methods.
133 *   <li> The results may be ordered by one or more attributes (method {@link #sort sort}).
134 *</ul>
135 * </p>
136 *
137 * <p>
138 * Therefore, this interface defines several rules how to specify attribute names whenever
139 * they have to be expressed as strings. The existence of these rules will make the
140 * implementations interoperable. But, of course, they can be ignored if the interoperability
141 * is not an issue.
142 * </p>
143 *
144 * <p>
145 * The following rules define how to create stringified names for individual attributes.
146 *<ol>
147 *  <li> The best recommended practice is to find attribute names from a controlled
148 *       vocabulary - see details in {@link BibRefSupport} interface.
149 *  <li> The stringified names of attributes of class {@link BibRef}  are equal to the
150 *       member names of this class. For example, <tt>identifier</tt>, <tt>type</tt>,
151 *       <tt>title</tt>, <tt>authors</tt>.
152 *  <li> The stringified names of attributes of sub-classes derived from class {@link BibRef},
153 *       and of attributes of other classes, are also equal to the member names but additionally
154 *       they must be qualified by the resource type using two underscores ( __ ).
155 *       For example,  <tt>Book__isbn</tt>,   <tt>JournalArticle__from_journal</tt>,
156 *       <tt>Journal__name</tt>.
157 *       <blockquote><em>
158 *           The somewhat unusual  double underscore  is suggested here because in some
159 *           query languages (where the stringified attribute names can be used as
160 *           variables) is an underscore the only non-alphabetic character allowed for
161 *           variables.
162 *       </em></blockquote>
163 *  <li> The qualification part of the stringified name (together with underscores) can
164 *       be omitted if there is no ambiguity. For example, if an implementation does not
165 *       use property name  <tt>isbn</tt>  anywhere else, the  <tt>Book__isbn</tt>  can
166 *       be replaced by simple <tt>isbn</tt>.
167 *       <blockquote><em>
168 *       Be aware, however, that dropping the qualifier may compromise extendibility
169 *       because a caller that expects a unique attribute name may break if another
170 *       citation type is added with the same attribute name.
171 *       </em></blockquote>
172 *  <li> The stringified names of the attributes from {@link BibRef#properties dynamic properties}
173 *       are equal to their property names, applying the rule about qualification as defined above.
174 *       Thus, for example, an attribute  <tt>registry_number</tt>  hidden in member
175 *       {@link BibRef#properties} will be stringified simply as <tt>registry_number</tt>,
176 *       and an attribute  <tt>location</tt>  hidden in properties of a sub-class
177 *       representing books will be stringified as  <tt>Book__location</tt>.
178 *  <li> The stringified names of the attributes from {@link BibRef#properties dynamic properties}
179 *       for instances without their own sub-class must be qualified (as described above) by
180 *       the contents of their {@link BibRef#type}.  For example, a citation can be of type
181 *       <tt>letter</tt>,  but there is no sub-class  <tt>Letter</tt>. Therefore, an attribute
182 *       <tt>type</tt>  has value  <tt>letter</tt>.  This value is then used to create a qualified
183 *       stringified name  <tt>letter__subject</tt>.
184 *  <li> The stringified names should be considered case-insensitive. Thus,  <tt>book__location</tt>
185 *       is the same as  <tt>Book__location</tt>,  and  <tt>journalarticle__issue</tt>  equals to
186 *       <tt>JournalArticle__issue</tt>.
187 *</ol>
188 * </p>
189 *
190 *<a name="how_criteria">
191 *<h3>Query Matching and Ordering Criteria</h3>
192 * <p>
193 * Several methods dealing with queries and sorting use a list of criteria.
194 * The criteria define how the matching or ordering should be done.
195 * </p>
196 *
197 * <p>
198 * Each criterion is fully defined by an instance of {@link BiblioCriterion}.
199 * Such definitions can be obtained from a controlled vocabulary - see
200 * {@link BibRefSupport#getSupportedCriteria()}.
201 * </p>
202 *
203 * <p>
204 * Because each criterion is uniquely identifiable by its name, the querying and
205 * sorting methods use only lists of criteria names, not lists of full criteria
206 * definitions. 
207 * </p>
208 *
209 *<a name="how_excluded">
210 *<h3>Excluded and Only-included attributes</h3>
211 * <p>
212 * Several methods use parameter with <em>excluded</em> attributes, or a
213 * parameter with <em>only-included</em> attributes.
214 * There are two different meanings and uses of such attributes lists.
215 * </p>
216 *
217 * <p>
218 * The first meaning is used by the query methods. They return a new query collection.
219 * From the practical and performance reasons it may be sometimes useful to define
220 * <b>in advance</b> that the citations representing the resulting query collection
221 * do not need to contain all attributes. The <em>excluded</em> list of attribute
222 * names defines what attributes are not needed - typical use is to exclude
223 * abstracts which may be quite long.
224 * This, using the <em>excluded</em> list in the query method means that the
225 * resulting query collection will never have all attributes fully filled with data
226 * (unless, of course, the implementation ignores the <em>excluded</em> list).
227 * </p>
228 *
229 * <p>
230 * The second meaning is for the retrieval methods. They return citations from
231 * a current query collection and can decide that only some attributes in the returned
232 * citations are filled with data (such parameter list is always named <em>onlyAttrs</em>).
233 * It may again mean that less data will be transferred
234 * but it is a <b>post-act</b> decision because the query collection has already all
235 * data and only does not return them now, but the next retrieval method (on the 
236 * same collection) can retrieve them.
237 * </p>
238 *
239 * <p>
240 * The both uses may be applied in different scenarios, and their efficiency is
241 * very dependent on the repository implementation. Sometimes the creation of a
242 * query collection already includes heavy data manipulation - therefore, the
243 * first usage may help with performance, But sometimes the resulting query
244 * collection is more or less a virtual collection and the real data transfer
245 * is applied only when the citations are being retrieved. In this case, the later
246 * scenario may be more efficient.
247 * </p>
248 *
249 * @author <A HREF="mailto:senger@ebi.ac.uk">Martin Senger</A>
250 * @author Matthew Pocock
251 * @version $Id$
252 * @see BibRef
253 * @see BibRefSupport
254 * @see BiblioCriterion
255 * @since 1.3
256 */
257
258public interface BibRefQuery {
259
260    /**
261     * <p>
262     * A property name specifying a list of excluded attribute names
263     * (the type of the property value should be <tt>String[]</tt>).
264     * </p>
265     *
266     * <p>
267     * The list is used to define attributes which are not returned in the
268     * resulting citations (see discussion on
269     * <a href="#how_excluded">excluded attributes</a>).
270     * </p>
271     *
272     * @see #find find
273     * @see #query query
274     */
275    static final String PROP_EXCLUDED_ATTRS = "excluded";
276
277    /**
278     * A property name specifying a list of searching and ordering criteria
279     * names (type of the property value should be <tt>String[]</tt>). See
280     * discussion on <a href="#how_criteria">criteria</a>.
281     *
282     * @see #find find
283     * @see #query query
284     */
285    static final String PROP_CRITERIONS     = "criterions";
286
287    /**************************************************************************
288     * <p>
289     * It returns an identification of the current query collection.
290     * </p>
291     *
292     * <p>
293     * At the beginning, the identification usually contain a bibliographic
294     * repository name or its contents description. But later, usually after
295     * {@link #connect} or after the first query, the identification may contain
296     * information rich enough to be able to re-create the whole collection
297     * (e.g. it can contain IDs of all records in the given collection).
298     * </p>
299     *
300     * <p>
301     * An implementation is not required to provide a persistent collection
302     * identification. However, if it does provide, it should also be
303     * able to accept the same identifier in the {@link #connect(byte[]) connect}
304     * method, and to use it to re-create the same collection.
305     * </p>
306     *
307     * @return an identification of the current collection (may be null)
308     *************************************************************************/
309    byte[] getCollectionId();
310
311    /**************************************************************************
312     * <p>
313     * It creates a connection to a bibliographic repository, or/and it makes
314     * all necessary initialization steps needed for further communication.
315     * </p>
316     *
317     * <p>
318     * However, there should be no need to call this method explicitly,
319     * the other methods should do it automatically before they need something
320     * from the repository.
321     * </p>
322     *
323     * @throws BibRefException if the connection cannot be established
324     *************************************************************************/
325    void connect()
326        throws BibRefException;
327
328    /**************************************************************************
329     * <p>
330     * It creates a connection to a bibliographic repository, or/and it makes
331     * all necessary initialization steps needed for further communication,
332     * and it makes the collection described by <tt>collectionId</tt>
333     * the current collection.
334     * </p>
335     *
336     * @see #connect connect without parameters
337     * @param collectionId a (usually persistent) token allowing to re-create
338     *        a collection; the parameter is the same as an identifier returned
339     *        earlier by method {@link #getCollectionId}
340     * @throws BibRefException if the connection cannot be established, or if the
341     *        collection with the given ID cannot be re-created
342     *************************************************************************/
343    void connect (byte[] collectionId)
344        throws BibRefException;
345
346    /**************************************************************************
347     * It checks if the repository is available. The semantic of 
348     * <em>available</em>depends on the implementation.
349     *
350     * @return true if it is ready
351     *************************************************************************/
352    boolean isReady();
353
354    /**************************************************************************
355     * <p>
356     * It disconnects from the repository.
357     * </p>
358     *
359     * <p>
360     * The caller can use this method to announce that the current query
361     * collection will not be needed soon. However, it may still be possible
362     * to connect to it later again.
363     * </p>
364     *
365     * @see #destroy destroy for more permanent action
366     *************************************************************************/
367    void disconnect();
368
369    /*************************************************************************
370     * <p>
371     * It frees all resources related to this query collection.
372     * </p>
373     *
374     * <p>
375     * The caller explicitly announces no interest in the current
376     * query collection at all. The existence of two separate
377     * methods {@link #disconnect} and <tt>destroy</tt> allows more flexibility
378     * for cases where an implementation deals with, for example,
379     * temporary repositories. 
380     * </p>
381     *
382     * @see #disconnect disconnect for less permanent action
383     * @throws BibRefException if the connection to the repository is broken
384     *************************************************************************/
385    void destroy()
386        throws BibRefException;
387
388    /*************************************************************************
389     * <p>
390     * The easiest direct method for querying a repository.
391     * </p>
392     *
393     * <p>
394     * It is modeled on examples of web-based searches: A caller can specify
395     * virtually anything in the list of keywords  and the implementation tries
396     * to search for these in as many attributes as possible and reasonable,
397     * applying logical AND between them. However, a caller can also specifically
398     * limit the search only to attributes specified in the  searched  list.
399     * </p>
400     *
401     * <p>
402     * <em>Note that there is no real query language used by this method,
403     * therefore, this method is not suitable for queries requiring
404     * logical operators (others than AND).</em>
405     * </p>
406     *
407     * <p>
408     * The query result can be influenced by the additional properties:
409     * <ul>
410     *  <li> Property {@link #PROP_EXCLUDED_ATTRS} is of type <tt>String[]</tt>
411     *       and contains list of attributes names which should not be
412     *       included in the resulting query collection. See discussions on
413     *       <a href="#how_excluded">excluded attributes</a> and on
414     *       <a href="#how_attr_names">stringified attribute names</a>,
415     *  <li> Property {@link #PROP_CRITERIONS} is also of type <tt>String[]</tt>
416     *       and contains list of criteria names. The caller specifies here
417     *       what criteria she wishes, and this method can change this property
418     *       and return here the criteria really used for the query.
419     *       See also discussion about <a href="#how_criteria">criteria</a>.
420     * </ul>
421     * </p>
422     *
423     * @param keywords keyword or phrases that are being looked for
424     * @param attrs attributes names that should be searched; if this list is
425     *              empty the implementation should search all reasonable
426     *              attributes
427     * @param properties specify attributes excluded from the results and
428     *                   requested criteria for the query
429     * @return a new query (and query-able) collection
430     * @throws BibRefException if query failed (which can have many reasons :-))
431     *         (note that an empty result does not cause an exception)
432     *************************************************************************/
433    BibRefQuery find (String[] keywords, String[] attrs, Hashtable properties)
434        throws BibRefException;
435
436    /*************************************************************************
437     * <p>
438     * This is a convenient method for a common query.
439     * </p>
440     *
441     * <p>
442     * The search is done only for attributes having non empty values in
443     * parameter  <tt>author</tt>.  For example, a search for citations written
444     * by authors with surname  <tt>Doe</tt>  can be specified by sending an
445     * instance of <tt>BiblioPerson</tt> with <tt>surname</tt> filled with
446     * <tt>Doe</tt>  and with other attributes empty. Or, a search for
447     * institution  <tt>EBI</tt>  can be specified by sending an instance of
448     * <tt>BiblioOrganization</tt> with <tt>name</tt> containing  <tt>EBI</tt>. 
449     * </p>
450     *
451     * <p>
452     * The query result can be influenced by the additional properties:
453     * <ul>
454     *  <li> Property {@link #PROP_EXCLUDED_ATTRS} is of type <tt>String[]</tt>
455     *       and contains list of attributes names which should not be
456     *       included in the resulting query collection. See discussions on
457     *       <a href="#how_excluded">excluded attributes</a> and on
458     *       <a href="#how_attr_names">stringified attribute names</a>,
459     *  <li> Property {@link #PROP_CRITERIONS} is also of type <tt>String[]</tt>
460     *       and contains list of criteria names. The caller specifies here
461     *       what criteria she wishes, and this method can change this property
462     *       and return here the criteria really used for the query.
463     *       See also discussion about <a href="#how_criteria">criteria</a>.
464     * </ul>
465     * </p>
466     *
467     * @see #find find
468     * @see BiblioPerson
469     * @see BiblioOrganisation
470     * @see BiblioService
471     * @param author contains one or more attributes that are being search for
472     * @param properties specify attributes excluded from the results and
473     *                   requested criteria for the query
474     * @return a new query (and query-able) collection
475     * @throws BibRefException if query failed (which can have many reasons :-))
476     *         (note that an empty result does not cause an exception)
477     *************************************************************************/
478    BibRefQuery findByAuthor (BiblioProvider author, Hashtable properties)
479        throws BibRefException;
480
481    /*************************************************************************
482     * <p>
483     * This is a convenient method returning just one citation.
484     * </p>
485     *
486     * <p>
487     * It queries the current collection in order to find and to retrieve
488     * a citation with the given identifier. It depends on the implementation
489     * what could be used as an identifier - see {@link BibRef#identifier}.
490     * </p>
491     *
492     * @see #findById(String,String[]) findById with limited returned attributes
493     * @param bibRefId an identifier of a citation that is being looked for
494     * @return a found bibliographic reference (citation)
495     * @throws BibRefException if such citation was not found (or something else
496     *                     bad happened)
497     *************************************************************************/
498    BibRef findById (String bibRefId)
499        throws BibRefException;
500
501    /*************************************************************************
502     * <p>
503     * This is a convenient method returning just one citation, perhaps with
504     * a limited number of attributes.
505     * </p>
506     *
507     * <p>
508     * It queries the current collection in order to find and to retrieve
509     * a citation with the given identifier. It depends on the implementation
510     * what could be used as an identifier - see {@link BibRef#identifier}.
511     * </p>
512     *
513     * <p>
514     * The returned citation will contain at least attributes whose names are
515     * specified by the parameter <tt>onlyAttrs</tt> (see discussion on
516     * <a href="#how_excluded">only-included attributes</a>.
517     * </p>
518     *
519     * <p>
520     * It is meant to provide more lightweight citation. The
521     * implementation may provide more attributes than specified in
522     * <tt>onlyAttrs</tt> (e.g. it is always recommended to include an
523     * attribute representing a unique identifier of the citation even
524     * if it is not asked for).
525     * </p>
526     *
527     * <p>
528     * Note that one can ask only for attributes that are available in the
529     * current collection. If the collection was already created
530     * <em>without</em> some attributes (using property
531     * {@link #PROP_EXCLUDED_ATTRS}, e.g in method {@link #find find}) one cannot
532     * expect to get them even if they are asked for by the parameter
533     * <tt>onlyAttrs</tt>.
534     * </p>
535     *
536     * @see #findById(String) findById
537     * @param bibRefId an identifier of a citation that is being looked for
538     * @param onlyAttrs a list of attribute names; at least these attributes
539     *                  will be included in the returned citation
540     * @return a found bibliographic reference (citation)
541     * @throws BibRefException if such citation was not found (or something else
542     *                     bad happened)
543     *************************************************************************/
544    BibRef findById (String bibRefId, String[] onlyAttrs)
545        throws BibRefException;
546
547    /*************************************************************************
548     * <p>
549     * It queries the current collection using a query language.
550     * </p>
551     *
552     * <p>
553     * Use this method when the simple {@link #find find} method is not sufficient.
554     * For example, when more logical or relational operators are needed
555     * to express the query,
556     * </p>
557     *
558     * <p>
559     * This specification does not propose any specific query language
560     * to use (but may in the future). Roughly speaking, the query
561     * method takes a <tt>query</tt> string and passes it to the repository
562     * implementation, and if the implementation understands the query
563     * the world is saved.
564     * </p>
565     *
566     * <p>
567     * Again, the query result can be influenced by the additional properties:
568     * <ul>
569     *  <li> Property {@link #PROP_EXCLUDED_ATTRS} is of type <tt>String[]</tt>
570     *       and contains list of attributes names which should not be
571     *       included in the resulting query collection. See discussions on
572     *       <a href="#how_excluded">excluded attributes</a> and on
573     *       <a href="#how_attr_names">stringified attribute names</a>,
574     *  <li> Property {@link #PROP_CRITERIONS} is also of type <tt>String[]</tt>
575     *       and contains list of criteria names. The caller specifies here
576     *       what criteria she wishes, and this method can change this property
577     *       and return here the criteria really used for the query.
578     *       See also discussion about <a href="#how_criteria">criteria</a>.
579     * </ul>
580     * </p>
581     *
582     * @see #find find
583     * @param query an expression in a query language
584     * @param properties specify attributes excluded from the results and
585     *                   requested criteria for the query
586     * @return a new query (and query-able) collection
587     * @throws BibRefException if query failed (which can have many reasons :-))
588     *         (note that an empty result does not cause an exception)
589     *************************************************************************/
590    BibRefQuery query (String query, Hashtable properties)
591        throws BibRefException;
592
593    /*************************************************************************
594     * <p>
595     * It returns the number of citations in the current collection.
596     * </p>
597     *
598     * @return the size of this collection
599     * @throws BibRefException if a connection with the repository is broken
600     *************************************************************************/
601    int getBibRefCount()
602        throws BibRefException;
603
604    /*************************************************************************
605     * <p>
606     * It sorts the current collection and returns another collection which is
607     * a sorted copy of the current collection.
608     * </p>
609     *
610     * <p>
611     * This is not strictly speaking a query method but it also returns
612     * a query collection.
613     * </p>
614     *
615     * <p>
616     * The sorting result can be influenced by an additional property
617     * {@link #PROP_CRITERIONS} (of type <tt>String[]</tt>) containing
618     * a list of sorting criteria names. The caller specifies here
619     * what criteria she wishes, and this method can change this property
620     * and return here the criteria really used for sorting.
621     * </p>
622     *
623     * @param orderedBy a list of attribute names that the collection should
624     *                  be sorted by
625     * @param properties FIXME: docs & params out of sync
626     * @return a sorted collection
627     * @throws BibRefException if sorting failed (which may also happen when
628     *         the collection is too large)
629     *************************************************************************/
630    BibRefQuery sort (String[] orderedBy, Hashtable properties)
631        throws BibRefException;
632
633    /*************************************************************************
634     * <p>
635     * It returns all citations from the current collection as a
636     * (possibly big) array. Obviously, the repository implementation
637     * may limit the number of returned records.
638     * </p>
639     *
640     * <p>
641     * Some attributes may be missing (empty) if the property
642     * {@link #PROP_EXCLUDED_ATTRS} was used for creating the current
643     * collection. See discussion on
644     * <a href="#how_excluded">excluded attributes</a>.
645     * </p>
646     *
647     * @see #getAllBibRefs(String[]) getAllBibRefs with limited returned attributes
648     * @return all citations from the current collection
649     * @throws BibRefException if the collection is too large, or if the connection
650     *        to the repository is broken
651     *************************************************************************/
652    BibRef[] getAllBibRefs()
653        throws BibRefException;
654
655    /*************************************************************************
656     * <p>
657     * It returns all citations from the current collection as a
658     * (possibly big) array, perhaps with a limited number of attributes.
659     * </p>
660     *
661     * <p>
662     * The returned citations will contain at least attributes whose names are
663     * specified by the parameter <tt>onlyAttrs</tt>. It is meant to provide
664     * more lightweight citations. The implementation may provide more
665     * attributes than specified in <tt>onlyAttrs</tt> (e.g. it may be always
666     * good to include an attribute representing a unique identifier of a
667     * citation even if it is not asked for). See discussion on
668     * <a href="#how_excluded">only-included attributes</a>.
669     * </p>
670     *
671     * <p>
672     * Note that one can ask only for attributes that are available in the
673     * current collection. If the collection was already created
674     * <em>without</em> some attributes (using property
675     * {@link #PROP_EXCLUDED_ATTRS}, e.g in method {@link #find find}) one
676     * cannot expect to get them even if they are asked for by the parameter
677     * <tt>onlyAttrs</tt>.
678     * </p>
679     *
680     * @see #getAllBibRefs getAllBibRefs with all attributes
681     * @see #getAllIDs getAllIDs
682     *
683     * @param onlyAttrs  attributes to attempt to include
684     * @return all citations from the current collection
685     * @throws BibRefException if the collection is too large, or if the connection
686     *        to the repository is broken
687     *************************************************************************/
688    BibRef[] getAllBibRefs (String[] onlyAttrs)
689        throws BibRefException;
690
691    /*************************************************************************
692     * <p>
693     * A convenient method returning just identifiers of all current citations.
694     * </p>
695     *
696     * @return a list of all identifiers
697     * @throws BibRefException if the collection is too large, or if the connection
698     *        to the repository is broken
699     *************************************************************************/
700    String[] getAllIDs()
701        throws BibRefException;
702
703    /*************************************************************************
704     * <p>
705     * It returns an enumeration of all citations from the current collection.
706     * The type of elements in the enumeration is {@link BibRef} (or of its
707     * sub-classes).
708     * </p>
709     *
710     * <p>
711     * Some attributes may be missing (empty) if the property
712     * {@link #PROP_EXCLUDED_ATTRS} was used for creating the current
713     * collection.
714     * </p>
715     *
716     * @see #getAllBibRefs getAllBibRefs
717     * @return an iterator over all citations
718     * @throws BibRefException if the connection to the repository is broken
719     *************************************************************************/
720    Enumeration getBibRefs()
721        throws BibRefException;
722
723    /*************************************************************************
724     * <p>
725     * It returns an enumeration of all citations from the current collection,
726     * perhaps with a limited number of attributes.
727     * The type of elements in the enumeration is {@link BibRef} (or of its
728     * sub-classes).
729     * </p>
730     *
731     * <p>
732     * The citations available through the enumeration will contain at least
733     * attributes whose names are specified by the parameter <tt>onlyAttrs</tt>.
734     * It is meant to provide more lightweight citations. The implementation
735     * may provide more attributes than specified in <tt>onlyAttrs</tt> (e.g.
736     * it may be always good to include an attribute representing a unique
737     * identifier of a citation even if it is not asked for).
738     * </p>
739     *
740     * <p>
741     * Note that one can ask only for attributes that are available in the
742     * current collection. If the collection was already created
743     * <em>without</em> some attributes (using property
744     * {@link #PROP_EXCLUDED_ATTRS}, e.g in method {@link #find find}) one cannot
745     * expect to get them even if they are asked for by the parameter
746     * <tt>onlyAttrs</tt>.
747     * </p>
748     *
749     * @see #getAllBibRefs getAllBibRefs
750     * @see #getBibRefs getBibRefs with all attributes
751     *
752     * @param onlyAttrs attributes to attempt to fetch
753     * @return an iterator over all citations
754     * @throws BibRefException if the connection to the repository is broken
755     *************************************************************************/
756    Enumeration getBibRefs (String[] onlyAttrs)
757        throws BibRefException;
758
759    /*************************************************************************
760     * <p>
761     * It returns all citations from the current collection as an XML stream.
762     * The contents of such XML stream is widely repository dependent.
763     * </p>
764     *
765     * <p>
766     * Some attributes may be missing (empty) if the property
767     * {@link #PROP_EXCLUDED_ATTRS} was used for creating the current
768     * collection.
769     * </p>
770     *
771     * @see #getAllBibRefs getAllBibRefs
772     * @return an XML data stream containing all citations from the current
773     *         collection
774     * @throws BibRefException if the collection is too large, or if the connection
775     *        to the repository is broken
776     *************************************************************************/
777    InputStream getAllBibRefsAsXML()
778        throws BibRefException;
779
780    /*************************************************************************
781     * <p>
782     * It returns an enumeration of all citations from the current collection.
783     * The type of elements in the enumeration is <tt>String</tt>.
784     * Each element represents one citation as an XML string.
785     * The contents of such XML string is widely repository dependent.
786     * </p>
787     *
788     * <p>
789     * Some attributes may be missing (empty) if the property
790     * {@link #PROP_EXCLUDED_ATTRS} was used for creating the current
791     * collection.
792     * </p>
793     *
794     * @see #getBibRefs getBibRefs
795     * @see #getAllBibRefsAsXML getAllBibRefsAsXML
796     * @return an iterator over all citations
797     * @throws BibRefException if the connection to the repository is broken
798     *************************************************************************/
799    Enumeration getBibRefsAsXML()
800        throws BibRefException;
801
802    /*************************************************************************
803     * <p>
804     * A convenient utility method converting a given citation to its
805     * XML representation. It is useful, for example, in cases when a
806     * program annotates citations on-the-fly and needs them in the
807     * same XML format.
808     * </p>
809     *
810     * <p>
811     * The XML format depends on the repository where the citation comes from.
812     * </p>
813     *
814     * @param bibRef a citation being converted into an XML format
815     * @return an XML representation of <tt>bibRef</tt>
816     * @throws BibRefException if the implementation needs it :-)
817     *************************************************************************/
818    String getBibRefAsXML (BibRef bibRef)
819        throws BibRefException;
820
821
822}