001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.tagvalue;
023
024import java.io.File;
025import java.io.FileNotFoundException;
026import java.io.FileReader;
027import java.util.Iterator;
028import java.util.Map;
029import java.util.Set;
030import java.util.Stack;
031
032import org.biojava.bio.program.indexdb.IndexStore;
033import org.biojava.utils.ParserException;
034import org.biojava.utils.SmallMap;
035import org.biojava.utils.SmallSet;
036import org.biojava.utils.io.CountedBufferedReader;
037import org.biojava.utils.io.RAF;
038
039/**
040 * <p>
041 * Listens to tag-value events and passes on indexing events to an IndexStore.
042 * This is an update to Indexer that understands that indexed properties may
043 * not be at the top level.
044 * </p>
045 *
046 * <p>
047 * This class is provided to allow the indexing of arbitrary record-based text
048 * files. Indexer objects are built for a single file and the indexes are
049 * written to a single index store. To keep all of the reader offsets in sync
050 * with one another, you will almost certainly wish to use the getReader()
051 * method to retrieve a CountedBufferedReader instance if you want to read the
052 * byte-offset between calls to Parser.read(). Below is an example of how to
053 * index a file.
054 * </p>
055 *
056 * <pre>
057 * File fileToIndex; // get this from somewhere
058 *
059 * // don't forget to register all the apropreate keys to the factory first.
060 * BioIndexStore indexStore = bioIndxStrFact.createBioStore();
061 *
062 * Indexer indexer = new Indexer(fileToIndex, indexStore);
063 * indexer.setPrimaryKeyName("foo", new String[] { "foo" });
064 * indexer.addSecondaryKey("bar", new String[] { "x", "y", "bar"});
065 * indexer.addSecondaryKey("baz", new String[] { "z" });
066 *
067 * TagValueParser tvParser; // make this appropriate for your format
068 * TagValueListener listener; // make this appropriate for your format
069 *                            // and forward all events to indexer
070 * 
071 * Parser parser = new Parser();
072 * while(
073 *   parser.read(indexer.getReader(), tvParser, listener)
074 * ) {
075 *   System.out.print(".");
076 * }
077 * </pre>
078 *
079 * @since 1.2
080 * @author Matthew Pocock
081 */
082public class Indexer2
083implements TagValueListener {
084  private final String primaryKeyName;
085  private final RAF file;
086  private final CountedBufferedReader reader;
087  private final IndexStore indexStore;
088  private final Map keys;
089  private final Map keyValues;
090  private Object tag;
091  private long offset;
092  private int depth;
093  private Stack stack;
094  
095  /**
096   * Build a new Indexer.
097   *
098   * @param file  the file to be processed
099   * @param indexStore  the IndexStore to write to
100   */
101  public Indexer2(File file, IndexStore indexStore, Index2Model model)
102  throws FileNotFoundException {
103    this.file = new RAF(file, "r");
104    this.reader = new CountedBufferedReader(new FileReader(file));
105    this.indexStore = indexStore;
106    this.keyValues = new SmallMap();
107    this.depth = 0;
108    this.stack = new Stack();
109    
110    this.keys = new SmallMap();
111    for(Iterator i = model.getKeys().iterator(); i.hasNext(); ) {
112      String key = (String) i.next();
113      Object val = model.getKeyPath(key);
114      
115      keys.put(val, key);
116    }
117    this.primaryKeyName = model.getPrimaryKeyName();
118  }
119  
120  /**
121   * Retrieve the reader that can be safely used to index this file.
122   * 
123   * @return the CountedBufferedReader that should be processed
124   */
125  public CountedBufferedReader getReader() {
126    return reader;
127  }
128  
129  public void startRecord() {
130    if(depth == 0) {
131      offset = reader.getFilePointer();
132      
133      Frame frame = new Frame();
134      
135      for(Iterator ki = keys.keySet().iterator(); ki.hasNext(); ) {
136        Object[] keyPath = (Object[]) ki.next();
137        if(keyPath.length == 1) {
138          frame.addKey(keyPath);
139        } else {
140          frame.paths.add(keyPath);
141        }
142      }
143      
144      stack.push(frame);
145    } else {
146      Frame top = (Frame) stack.peek();
147      Frame frame = new Frame();
148      
149      //System.out.println("Tag: " + tag);
150      //System.out.println("Deth: " + depth);
151      //System.out.println("Top: " + top);
152
153      for(Iterator ki = top.paths.iterator(); ki.hasNext(); ) {
154        Object[] keyPath = (Object[]) ki.next();
155        if(keyPath[depth-1].equals(tag)) {
156          if((keyPath.length-1) == depth) {
157            frame.addKey(keyPath);
158          } else {
159            frame.paths.add(keyPath);
160          }
161        }
162      }
163      //System.out.println("Pushing new stack frame: " + top + " <- " + frame);
164      stack.push(frame);
165    }
166    
167    depth++;
168  }
169  
170  public void startTag(Object tag) {
171    this.tag = tag;
172    //if(depth >= 2) System.out.println("tag: " + tag);
173  }
174  
175  public void value(TagValueContext ctxt, Object value) {
176    Frame frame = (Frame) stack.peek();
177    Object[] keyPath = (Object []) frame.getKeyPath(tag);
178    if(keyPath != null) {
179      //if(depth >= 2) System.out.println("Interested in: " + tag + " -> " + value);
180      KeyState ks = (KeyState) keyValues.get(keyPath);
181      if(ks == null) {
182        //if(depth >= 2) System.out.println("Allocating stoorage");
183        keyValues.put(keyPath, ks = new KeyState(keys.get(keyPath).toString()));
184      }
185      ks.values.add(value);
186      //if(depth >= 2) System.out.println(keyValues);
187    }
188  }
189  
190  public void endTag() {}
191  
192  public void endRecord()
193  throws ParserException
194  {
195    depth--;
196    if(depth == 0) {
197      int length = (int) (reader.getFilePointer() - offset);
198
199      //System.out.println("keyValues: " + keyValues);
200      String primaryKeyValue = null;
201      Map secKeys = new SmallMap();
202      for(Iterator i = keyValues.keySet().iterator(); i.hasNext(); ) {
203        Object key = i.next();
204        KeyState ks = (KeyState) keyValues.get(key);
205        if(ks.keyName.equals(primaryKeyName)) {
206          if(ks.values.size() != 1) {
207            throw new ParserException(
208              "There must be exactly one value for the primary key: " +
209              primaryKeyName + " - " + ks.values
210            );
211          }
212          primaryKeyValue = ks.values.iterator().next().toString();
213        } else {
214          secKeys.put(ks.keyName, ks.values);
215        }
216      }
217      
218      if(primaryKeyValue == null) {
219        throw new NullPointerException("No primary key");
220      }
221
222      //System.out.println("Primary: " + primaryKeyValue);
223      //System.out.println("Secondaries: " + secKeys);
224
225      indexStore.writeRecord(
226        file,
227        offset,
228        length,
229        primaryKeyValue,
230        secKeys
231      );
232      
233      stack.clear();
234      for(Iterator i = keyValues.values().iterator(); i.hasNext(); ) {
235        KeyState ks = (KeyState) i.next();
236        ks.values.clear();
237      }
238    } else {
239      stack.pop();
240    }
241  }
242  
243  private static class Frame {
244    public final Map keys = new SmallMap();
245    public final Set paths = new SmallSet();
246    
247    public void addKey(Object[] keyPath) {
248      keys.put(keyPath[keyPath.length - 1], keyPath);
249    }
250    
251    
252    public Object[] getKeyPath(Object tag) {
253      return (Object []) keys.get(tag);
254    }
255
256    public String toString() {
257      return this.getClass() + ": (" + keys + "\t" + paths + " )";
258    }
259  }
260  
261  private static class KeyState {
262    public final String keyName;
263    public final Set values = new SmallSet();
264    
265    public KeyState(String keyName) {
266      this.keyName = keyName;
267    }
268
269    public String toString() {
270      return this.getClass() + ": (" + keyName + " " + values + ")";
271    }
272  }
273}
274