001package org.biojava.bio.program.tagvalue; 002 003import org.biojava.utils.ParserException; 004 005/** 006 * Partician multiple values for a tag into their own tag groups. 007 * 008 * <p>With tag-value files, it is not uncommon for information about logical 009 * blocks of data to be encoded in the values as well as tags of the document. 010 * For example, in swissprot entries, the comment block may be punctuated by 011 * lines that clearly seperate one logical comment from another, but there may 012 * be no change in the pattern of tags to indicate this. Some fields, such as 013 * alternate names in Enzyme, are a ist of values but some of the values are 014 * longer than a single line. Each value is terminated with a period ".", but 015 * again there is no way from the tags to know the logical grouping.</p> 016 * 017 * <p>This class provides callbacks to allow event streams to be re-written 018 * so that they contain this information. A single CC tag with multiple values 019 * can be re-written as multiple CC tags with values for each logical comment. 020 * This is done by presenting each value to an instance of Agregator.Observer 021 * that indicates if the current value signals the end of a logcal block. 022 * </p> 023 * 024 * @since 1.4 025 * @author Matthew Pocock 026 */ 027public class MultiTagger extends SimpleTagValueWrapper { 028 private final BoundaryFinder observer; 029 030 // state 031 // 032 boolean inTag; 033 boolean seenValues; 034 Object tag; 035 036 public MultiTagger(TagValueListener listener, BoundaryFinder observer) { 037 super(listener); 038 this.observer = observer; 039 } 040 041 public BoundaryFinder getBoundaryFinder() { 042 return observer; 043 } 044 045 public void startTag(Object tag) 046 throws ParserException { 047 this.tag = tag; 048 inTag = false; 049 seenValues = false; 050 } 051 052 public void value(TagValueContext ctxt, Object value) 053 throws ParserException { 054 seenValues = true; 055 056 if(observer.isBoundaryStart(value)) { 057 if(inTag) { 058 super.endTag(); 059 } 060 super.startTag(tag); 061 if(!observer.dropBoundaryValues()) { 062 super.value(ctxt, value); 063 } 064 inTag = true; 065 } else if(observer.isBoundaryEnd(value)) { 066 if(!inTag) { 067 super.startTag(tag); 068 } 069 if(!observer.dropBoundaryValues()) { 070 super.value(ctxt, value); 071 } 072 super.endTag(); 073 inTag = false; 074 } else { 075 if(!inTag) { 076 super.startTag(tag); 077 inTag = true; 078 } 079 super.value(ctxt, value); 080 } 081 } 082 083 public void endTag() 084 throws ParserException { 085 if(inTag) { 086 super.endTag(); 087 } else if(!seenValues) { 088 // bounary condition where there are no values associated with a tag 089 super.startTag(tag); 090 super.endTag(); 091 } 092 inTag = false; 093 seenValues = false; 094 } 095}