001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.seq.io; 022 023import java.io.Serializable; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.StringTokenizer; 027 028import org.biojava.bio.taxa.CircularReferenceException; 029import org.biojava.bio.taxa.EbiFormat; 030import org.biojava.bio.taxa.Taxon; 031import org.biojava.bio.taxa.TaxonFactory; 032import org.biojava.bio.taxa.TaxonParser; 033import org.biojava.utils.ChangeVetoException; 034 035/** 036 * A parser that is able to generate Taxon entries for sequence 037 * builder event streams. 038 * 039 * @author Matthew Pocock 040 * @deprecated Use org.biojavax.bio.taxa framework instead 041 */ 042public class OrganismParser 043 extends 044 SequenceBuilderFilter 045// implements 046// ParseErrorSource 047{ 048 public static final String PROPERTY_ORGANISM = OrganismParser.class + ":organism"; 049 050 /** 051 * Factory which wraps SequenceBuilders in an OrganismParser. 052 * 053 * @author Matthew Pocock 054 */ 055 public static class Factory 056 implements 057 SequenceBuilderFactory, 058 Serializable 059 { 060 private SequenceBuilderFactory delegateFactory; 061 private String sciNameKey; 062 private String commonNameKey; 063 private String ncbiTaxonKey; 064 private TaxonFactory taxonFactory; 065 private TaxonParser taxonParser; 066 067 public Factory( 068 SequenceBuilderFactory delegateFactory, 069 TaxonFactory taxonFactory, 070 TaxonParser taxonParser, 071 String sciNameKey, 072 String commonNameKey, 073 String ncbiTaxonKey 074 ) { 075 this.delegateFactory = delegateFactory; 076 this.taxonFactory = taxonFactory; 077 this.taxonParser = taxonParser; 078 this.sciNameKey = sciNameKey; 079 this.commonNameKey = commonNameKey; 080 this.ncbiTaxonKey = ncbiTaxonKey; 081 } 082 083 public SequenceBuilder makeSequenceBuilder() { 084 return new OrganismParser( 085 delegateFactory.makeSequenceBuilder(), 086 taxonFactory, 087 taxonParser, 088 sciNameKey, 089 commonNameKey, 090 ncbiTaxonKey 091 ); 092 } 093 } 094 095 private final TaxonFactory taxonFactory; 096 private final TaxonParser taxonParser; 097 private final String sciNameKey; 098 private final String commonNameKey; 099 private final String ncbiTaxonKey; 100 private String fullName; 101 private String commonName; 102 private String ncbiTaxon; 103 104 public OrganismParser( 105 SequenceBuilder delegate, 106 TaxonFactory taxonFactory, 107 TaxonParser taxonParser, 108 String sciNameKey, 109 String commonNameKey, 110 String ncbiTaxonKey 111 ) { 112 super(delegate); 113 this.taxonFactory = taxonFactory; 114 this.taxonParser = taxonParser; 115 this.sciNameKey = sciNameKey; 116 this.commonNameKey = commonNameKey; 117 this.ncbiTaxonKey = ncbiTaxonKey; 118 } 119 120 public void addSequenceProperty(Object sciNameKey, Object value) 121 throws 122 ParseException 123 { 124 if(this.sciNameKey.equals(sciNameKey)) { 125 if(fullName == null) { 126 fullName = value.toString(); 127 } else { 128 fullName = fullName + " " + value; 129 } 130 } else if(this.commonNameKey.equals(sciNameKey)) { 131 commonName = value.toString(); 132 } else if(this.ncbiTaxonKey.equals(sciNameKey)) { 133 String tid = value.toString(); 134 int eq = tid.indexOf("="); 135 if(eq >= 0) { 136 tid = tid.substring(eq + 1); 137 } 138 int sc = tid.indexOf(";"); 139 if(sc >= 0) { 140 tid = tid.substring(0, sc); 141 } 142 if(this.ncbiTaxon == null) { 143 this.ncbiTaxon = tid; 144 } else { 145 this.ncbiTaxon = this.ncbiTaxon + tid; 146 } 147 } else { 148 getDelegate().addSequenceProperty(sciNameKey, value); 149 } 150 } 151 152 public void endSequence() 153 throws 154 ParseException 155 { 156 try { 157 Taxon taxon = taxonParser.parse(taxonFactory, fullName); 158 if(commonName != null && taxon.getCommonName() == null) { 159 try { 160 taxon.setCommonName(commonName); 161 } catch (ChangeVetoException cve) { 162 throw new ParseException(cve, "Failed to build Taxon"); 163 } 164 } 165 StringTokenizer stok = new StringTokenizer(ncbiTaxon, ","); 166 if(stok.countTokens() == 1) { 167 taxon.getAnnotation().setProperty(EbiFormat.PROPERTY_NCBI_TAXON, ncbiTaxon); 168 } else { 169 List tl = new ArrayList(); 170 while(stok.hasMoreTokens()) { 171 tl.add(stok.nextToken()); 172 } 173 taxon.getAnnotation().setProperty(EbiFormat.PROPERTY_NCBI_TAXON, tl); 174 } 175 getDelegate().addSequenceProperty(PROPERTY_ORGANISM, taxon); 176 } catch (ChangeVetoException cve) { 177 throw new ParseException(cve, "Could not parse organism: " + fullName); 178 } catch (CircularReferenceException cre) { 179 throw new ParseException(cre); 180 } 181 } 182} 183 184