001package org.biojava.bio.program.tagvalue;
002
003import org.biojava.utils.ParserException;
004
005/**
006 * Partician multiple values for a tag into their own tag groups.
007 *
008 * <p>With tag-value files, it is not uncommon for information about logical
009 * blocks of data to be encoded in the values as well as tags of the document.
010 * For example, in swissprot entries, the comment block may be punctuated by
011 * lines that clearly seperate one logical comment from another, but there may
012 * be no change in the pattern of tags to indicate this. Some fields, such as
013 * alternate names in Enzyme, are a ist of values but some of the values are
014 * longer than a single line. Each value is terminated with a period ".", but
015 * again there is no way from the tags to know the logical grouping.</p>
016 *
017 * <p>This class provides callbacks to allow event streams to be re-written
018 * so that they contain this information. A single CC tag with multiple values
019 * can be re-written as multiple CC tags with values for each logical comment.
020 * This is done by presenting each value to an instance of Agregator.Observer
021 * that indicates if the current value signals the end of a logcal block.
022 * </p>
023 *
024 * @since 1.4
025 * @author Matthew Pocock
026 */
027public class MultiTagger extends SimpleTagValueWrapper {
028  private final BoundaryFinder observer;
029
030  // state
031  //
032  boolean inTag;
033  boolean seenValues;
034  Object tag;
035
036  public MultiTagger(TagValueListener listener, BoundaryFinder observer) {
037    super(listener);
038    this.observer = observer;
039  }
040
041  public BoundaryFinder getBoundaryFinder() {
042    return observer;
043  }
044  
045  public void startTag(Object tag)
046  throws ParserException {
047    this.tag = tag;
048    inTag = false;
049    seenValues = false;
050  }
051
052  public void value(TagValueContext ctxt, Object value)
053  throws ParserException {
054    seenValues = true;
055
056    if(observer.isBoundaryStart(value)) {
057      if(inTag) {
058        super.endTag();
059      }
060      super.startTag(tag);
061      if(!observer.dropBoundaryValues()) {
062        super.value(ctxt, value);
063      }
064      inTag = true;
065    } else if(observer.isBoundaryEnd(value)) {
066      if(!inTag) {
067        super.startTag(tag);
068      }
069      if(!observer.dropBoundaryValues()) {
070        super.value(ctxt, value);
071      }
072      super.endTag();
073      inTag = false;
074    } else {
075      if(!inTag) {
076        super.startTag(tag);
077        inTag = true;
078      }
079      super.value(ctxt, value);
080    }
081  }
082
083  public void endTag()
084  throws ParserException {
085    if(inTag) {
086      super.endTag();
087    } else if(!seenValues) {
088      // bounary condition where there are no values associated with a tag
089      super.startTag(tag);
090      super.endTag();
091    }
092    inTag = false;
093    seenValues = false;
094  }
095}