001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.bio.seq.io;
022
023import java.io.Serializable;
024import java.util.ArrayList;
025import java.util.List;
026import java.util.StringTokenizer;
027
028import org.biojava.bio.taxa.CircularReferenceException;
029import org.biojava.bio.taxa.EbiFormat;
030import org.biojava.bio.taxa.Taxon;
031import org.biojava.bio.taxa.TaxonFactory;
032import org.biojava.bio.taxa.TaxonParser;
033import org.biojava.utils.ChangeVetoException;
034
035/**
036 * A parser that is able to generate Taxon entries for sequence
037 * builder event streams.
038 *
039 * @author Matthew Pocock
040 * @deprecated Use org.biojavax.bio.taxa framework instead
041 */
042public class OrganismParser
043  extends
044    SequenceBuilderFilter
045//  implements
046//    ParseErrorSource
047{
048  public static final String PROPERTY_ORGANISM = OrganismParser.class + ":organism";
049  
050  /**
051   * Factory which wraps SequenceBuilders in an OrganismParser.
052   *
053   * @author Matthew Pocock
054   */
055  public static class Factory
056    implements
057      SequenceBuilderFactory,
058      Serializable
059  {
060    private SequenceBuilderFactory delegateFactory;
061    private String sciNameKey;
062    private String commonNameKey;
063    private String ncbiTaxonKey;
064    private TaxonFactory taxonFactory;
065    private TaxonParser taxonParser;
066    
067    public Factory(
068      SequenceBuilderFactory delegateFactory,
069      TaxonFactory taxonFactory,
070      TaxonParser taxonParser,
071      String sciNameKey,
072      String commonNameKey,
073      String ncbiTaxonKey
074    ) {
075      this.delegateFactory = delegateFactory;
076      this.taxonFactory = taxonFactory;
077      this.taxonParser = taxonParser;
078      this.sciNameKey = sciNameKey;
079      this.commonNameKey = commonNameKey;
080      this.ncbiTaxonKey = ncbiTaxonKey;
081    }
082    
083    public SequenceBuilder makeSequenceBuilder() {
084      return new OrganismParser(
085        delegateFactory.makeSequenceBuilder(),
086        taxonFactory,
087        taxonParser,
088        sciNameKey,
089        commonNameKey,
090        ncbiTaxonKey
091      );
092    }
093  }
094  
095  private final TaxonFactory taxonFactory;
096  private final TaxonParser taxonParser;
097  private final String sciNameKey;
098  private final String commonNameKey;
099  private final String ncbiTaxonKey;
100  private String fullName;
101  private String commonName;
102  private String ncbiTaxon;
103  
104  public OrganismParser(
105    SequenceBuilder delegate,
106    TaxonFactory taxonFactory,
107    TaxonParser taxonParser,
108    String sciNameKey,
109    String commonNameKey,
110    String ncbiTaxonKey
111  ) {
112    super(delegate);
113    this.taxonFactory = taxonFactory;
114    this.taxonParser = taxonParser;
115    this.sciNameKey = sciNameKey;
116    this.commonNameKey = commonNameKey;
117    this.ncbiTaxonKey = ncbiTaxonKey;
118  }
119  
120  public void addSequenceProperty(Object sciNameKey, Object value)
121    throws
122      ParseException
123  {
124    if(this.sciNameKey.equals(sciNameKey)) {
125      if(fullName == null) {
126        fullName = value.toString();
127      } else {
128        fullName = fullName + " " + value;
129      }
130    } else if(this.commonNameKey.equals(sciNameKey)) {
131      commonName = value.toString();
132    } else if(this.ncbiTaxonKey.equals(sciNameKey)) {
133      String tid = value.toString();
134      int eq = tid.indexOf("=");
135      if(eq >= 0) {
136        tid = tid.substring(eq + 1);
137      }
138      int sc = tid.indexOf(";");
139      if(sc >= 0) {
140        tid = tid.substring(0, sc);
141      }
142      if(this.ncbiTaxon == null) {
143        this.ncbiTaxon = tid;
144      } else {
145        this.ncbiTaxon = this.ncbiTaxon + tid;
146      }
147    } else {
148      getDelegate().addSequenceProperty(sciNameKey, value);
149    }
150  }
151  
152  public void endSequence()
153    throws
154      ParseException
155  {
156    try {
157      Taxon taxon = taxonParser.parse(taxonFactory, fullName);
158      if(commonName != null && taxon.getCommonName() == null) {
159        try {
160          taxon.setCommonName(commonName);
161        } catch (ChangeVetoException cve) {
162          throw new ParseException(cve, "Failed to build Taxon");
163        }
164      }
165      StringTokenizer stok = new StringTokenizer(ncbiTaxon, ",");
166      if(stok.countTokens() == 1) {
167        taxon.getAnnotation().setProperty(EbiFormat.PROPERTY_NCBI_TAXON, ncbiTaxon);
168      } else {
169        List tl = new ArrayList();
170        while(stok.hasMoreTokens()) {
171          tl.add(stok.nextToken());
172        }
173        taxon.getAnnotation().setProperty(EbiFormat.PROPERTY_NCBI_TAXON, tl);
174      }
175      getDelegate().addSequenceProperty(PROPERTY_ORGANISM, taxon);
176    } catch (ChangeVetoException cve) {
177      throw new ParseException(cve, "Could not parse organism: " + fullName);
178    } catch (CircularReferenceException cre) {
179      throw new ParseException(cre);
180    }
181  }
182}
183
184