001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.taxa; 022 023 024import java.util.Iterator; 025import java.util.List; 026import java.util.StringTokenizer; 027 028import org.biojava.bio.Annotation; 029import org.biojava.utils.ChangeVetoException; 030 031/** 032 * Encapsulate the 'EBI' species format used in Embl, Genbank and Swissprot 033 * files. The Taxon objects created by this process have the following annotations: <p> 034 * 035 * <pre> 036 * key=PROPERTY_NCBI_TAXON, value=String representation of the NCBI taxon id 037 * key=PROPERTY_TAXON_NAMES, value=Map from name-class to name (see ncbi names.dmp) 038 * </pre> 039 * 040 * @author Matthew Pocock 041 * @author Len Trigg 042 * @deprecated replaced by classes in {@link org.biojavax.bio.taxa org.biojavax.bio.taxa} 043 */ 044public class EbiFormat implements TaxonParser { 045 public static final String PROPERTY_NCBI_TAXON = EbiFormat.class + ":NCBI_TAXON"; 046 public static final String PROPERTY_TAXON_NAMES = EbiFormat.class + ":TAXON_NAMES"; 047 private static EbiFormat INSTANCE = new EbiFormat(); 048 049 public static final EbiFormat getInstance() { 050 if(INSTANCE == null) { 051 INSTANCE = new EbiFormat(); 052 } 053 054 return INSTANCE; 055 } 056 057 public Taxon parse(TaxonFactory taxonFactory, String taxonString) 058 throws 059 ChangeVetoException, 060 CircularReferenceException 061 { 062 String name = taxonString.trim(); 063 if(name.endsWith(".")) { 064 name = name.substring(0, name.length() - 1); 065 } 066 067 Taxon taxon = taxonFactory.getRoot(); 068 StringTokenizer sTok = new StringTokenizer(name, ";"); 069 070 if(sTok.countTokens() == 1) { 071 return taxonFactory.addChild(taxon, taxonFactory.createTaxon(name, null)); 072 } 073 074 String tok = null; 075 CLIMB_TREE: 076 while(sTok.hasMoreTokens()) { 077 tok = sTok.nextToken().trim(); 078 for(Iterator i = taxon.getChildren().iterator(); i.hasNext(); ) { 079 Taxon child = (Taxon) i.next(); 080 if(child.getScientificName().equals(tok)) { 081 taxon = child; 082 continue CLIMB_TREE; // found child by name - go through loop again 083 } 084 } 085 086 break; // couldn't find a child by than name - stop this and move on 087 } 088 089 for(; sTok.hasMoreTokens(); tok = sTok.nextToken().trim()) { 090 taxon = taxonFactory.addChild( 091 taxon, 092 taxonFactory.createTaxon(tok, null) 093 ); 094 } 095 096 return taxon; 097 } 098 099 public String serialize(Taxon taxon) { 100 String name = null; 101 102 do { 103 String sci = taxon.getScientificName(); 104 if(name == null) { 105 name = sci + "."; 106 } else { 107 name = sci + "; " + name; 108 } 109 taxon = taxon.getParent(); 110 } while(taxon != null && taxon.getParent() != null); 111 112 return name; 113 } 114 115 public String serializeSource(Taxon taxon) { 116 StringBuffer sb = new StringBuffer(taxon.getScientificName()); 117 String common = taxon.getCommonName(); 118 if ((common != null) && (common.length() > 0)) { 119 sb.append(" (").append(taxon.getCommonName()).append(")"); 120 } 121 sb.append('.'); 122 return sb.toString(); 123 } 124 125 public String serializeXRef(Taxon taxon) { 126 Annotation anno = taxon.getAnnotation(); 127 Object t = anno.getProperty(EbiFormat.PROPERTY_NCBI_TAXON); 128 if (t instanceof List) { 129 t = (String) ((List) t).get(0); 130 } 131 return "NCBI_TaxID=" + t + ";"; 132 } 133}