001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.tagvalue;
023
024import java.io.File;
025import java.io.FileNotFoundException;
026import java.io.FileReader;
027import java.util.ArrayList;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031
032import org.biojava.bio.program.indexdb.IndexStore;
033import org.biojava.utils.ParserException;
034import org.biojava.utils.SmallMap;
035import org.biojava.utils.io.CountedBufferedReader;
036import org.biojava.utils.io.RAF;
037
038/**
039 * <p>
040 * Listens to tag-value events and passes on indexing events to an IndexStore.
041 * </p>
042 *
043 * <p>
044 * This class is provided to allow the indexing of arbitrary record-based text
045 * files. Indexer objects are built for a single file and the indexes are
046 * written to a single index store. To keep all of the reader offsets in sync
047 * with one another, you will almost certainly wish to use the getReader()
048 * method to retrieve a CountedBufferedReader instance if you want to read the
049 * byte-offset between calls to Parser.read(). Below is an example of how to
050 * index a file.
051 * </p>
052 *
053 * <p><em>Note:</em> It is very important to configure the BioStoreFactory
054 * instance with all the right keys before hand.</p>
055 *
056 * <pre>
057 * File fileToIndex; // get this from somewhere
058 * BioStore store = bsf.createBioStore();
059 * Indexer indexer = new Indexer(fileToIndex, store);
060 * indexer.setPrimaryKeyName("foo");
061 * indexer.addSecondaryKey("bar");
062 * indexer.addSecondaryKey("baz");
063 *
064 * TagValueParser tvParser; // make this appropriate for your format
065 * TagValueListener listener; // make this appropriate for your format
066 *                            // and forward all events to changer
067 * 
068 * Parser parser = new Parser();
069 * while(
070 *   parser.read(indexer.getReader(), tvParser, listener)
071 * ) {
072 *   System.out.print(".");
073 * }
074 * </pre>
075 *
076 * @since 1.2
077 * @author Matthew Pocock
078 */
079public class Indexer
080implements TagValueListener {
081  private final RAF file;
082  private final CountedBufferedReader reader;
083  private final IndexStore indexStore;
084  private final Map seccondaryKeys;
085  private String primaryKeyName;
086  private String primaryKey;
087  private Object tag;
088  private long offset;
089  private int depth;
090  
091  /**
092   * Build a new Indexer.
093   *
094   * @param file  the file to be processed
095   * @param indexStore  the IndexStore to write to
096   */
097  public Indexer(File file, IndexStore indexStore)
098  throws FileNotFoundException {
099    this.file = new RAF(file, "r");
100    this.reader = new CountedBufferedReader(new FileReader(file));
101    this.indexStore = indexStore;
102    this.seccondaryKeys = new SmallMap();
103    this.depth = 0;
104  }
105  
106  /**
107   * Retrieve the reader that can be safely used to index this file.
108   * 
109   * @return the CountedBufferedReader that should be processed
110   */
111  public CountedBufferedReader getReader() {
112    return reader;
113  }
114  
115  /**
116   * <p>
117   * Set the tag to use as a primary key in the index.
118   * </p>
119   *
120   * <p>
121   * Whenever a value for the primary key tag is seen, this is passed to the
122   * indexer as the primary key for indexing.
123   * </p>
124   *
125   * <p>
126   * Primary keys must be unique between entries, and each entry must provide
127   * exactly one primary key value.
128   * </p>
129   *
130   * @param primaryKeyName the tag to use as primary key
131   */
132  public void setPrimaryKeyName(String primaryKeyName) {
133    this.primaryKeyName = primaryKeyName;
134  }
135  
136  /**
137   * Retrieve the tag currently used as primary key.
138   *
139   * @return a String representing the primary key name
140   */
141  public String getPrimaryKeyName() {
142    return primaryKeyName;
143  }
144  
145  /**
146   * <p>
147   * Add a secondary key.
148   * </p>
149   *
150   * <p>
151   * Secondary keys are potentially non-unique properties of the entries being
152   * indexed. Multiple records can use the same secondary key values, and a
153   * single record can have multiple values for a secondary key.
154   * </p>
155   *
156   * @param secKeyName  the name of the secondary key to add
157   */
158  public void addSecondaryKey(String secKeyName) {
159    seccondaryKeys.put(secKeyName, new ArrayList());
160  }
161  
162  /**
163   * Remove a secondary key.
164   *
165   * @param secKeyName  the name of the secondary key to remove
166   */
167  public void removeSecondaryKey(String secKeyName) {
168    seccondaryKeys.remove(secKeyName);
169  }
170  
171  public void startRecord() {
172    if(depth == 0) {
173      offset = reader.getFilePointer();
174      primaryKey = null;
175      for(Iterator i = seccondaryKeys.values().iterator(); i.hasNext(); ) {
176        List list = (List) i.next();
177        list.clear();
178      }
179    }
180    
181    depth++;
182  }
183  
184  public void startTag(Object tag) {
185    this.tag = tag;
186  }
187  
188  public void value(TagValueContext ctxt, Object value) {
189    if(tag.equals(primaryKeyName)) {
190      primaryKey = value.toString();
191    }
192    
193    List l = (List) seccondaryKeys.get(tag);
194    if(l != null) {
195      l.add(value.toString());
196    }
197  }
198  
199  public void endTag() {}
200  
201  public void endRecord()
202  throws ParserException
203  {
204    depth--;
205    if(depth == 0) {
206      if(primaryKey == null) {
207        throw new NullPointerException("No primary key");
208      }
209
210      int length = (int) (reader.getFilePointer() - offset);
211      indexStore.writeRecord(
212        file,
213        offset,
214        length,
215        primaryKey,
216        seccondaryKeys
217      );
218    }
219  }
220}
221