001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.seq.io; 024 025import java.util.HashMap; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.Map; 029import java.util.Set; 030 031import org.biojava.bio.symbol.FiniteAlphabet; 032import org.biojava.bio.symbol.IllegalSymbolException; 033import org.biojava.bio.symbol.Symbol; 034import org.biojava.utils.ChangeListener; 035import org.biojava.utils.ChangeType; 036 037/** 038 * Simple implementation of SymbolTokenization which uses the `name' 039 * field of the symbols. This class works with any non-cross-product 040 * FiniteAlphabet, and doesn't need any extra data to be provided. 041 * 042 * @author Thomas Down 043 * @since 1.2 044 */ 045 046public class NameTokenization extends WordTokenization { 047 private transient Map nameToSymbol = null; 048 private boolean caseSensitive; 049 050 public NameTokenization(FiniteAlphabet fab, boolean caseSensitive) { 051 super(fab); 052 fab.addChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN); 053 this.caseSensitive = caseSensitive; 054 } 055 056 /** 057 * Construct a new NameTokenization, defaulting to case-insensitive. 058 */ 059 060 public NameTokenization(FiniteAlphabet fab) { 061 this(fab, false); 062 } 063 064 protected void finalize() throws Throwable { 065 super.finalize(); 066 getAlphabet().removeChangeListener(ChangeListener.ALWAYS_VETO, ChangeType.UNKNOWN); 067 } 068 069 protected Map getNameToSymbol() { 070 if (nameToSymbol == null) { 071 nameToSymbol = new HashMap(); 072 for (Iterator i = ((FiniteAlphabet) getAlphabet()).iterator(); i.hasNext(); ) { 073 Symbol sym = (Symbol) i.next(); 074 if (caseSensitive) { 075 nameToSymbol.put(sym.getName(), sym); 076 } else { 077 nameToSymbol.put(sym.getName().toLowerCase(), sym); 078 } 079 } 080 nameToSymbol.put("gap", getAlphabet().getGapSymbol()); 081 } 082 083 return nameToSymbol; 084 } 085 086 public Symbol parseToken(String token) 087 throws IllegalSymbolException 088 { 089 Symbol sym; 090 if (caseSensitive) { 091 sym = (Symbol) getNameToSymbol().get(token); 092 } else { 093 sym = (Symbol) getNameToSymbol().get(token.toLowerCase()); 094 } 095 096 if (sym == null) { 097 char c = token.charAt(0); 098 if (c == '[') { 099 if (token.charAt(token.length() - 1) != ']') { 100 throw new IllegalSymbolException("Mismatched parentheses: " + token); 101 } else { 102 Symbol[] syms = parseString(token.substring(1, token.length() - 1)); 103 Set ambigSet = new HashSet(); 104 for (int i = 0; i < syms.length; ++i) { 105 ambigSet.add(syms[i]); 106 } 107 return getAlphabet().getAmbiguity(ambigSet); 108 } 109 } else { 110 throw new IllegalSymbolException("Token `" + token + "' does not appear as a named symbol in alphabet `" + getAlphabet().getName() + "'"); 111 } 112 } 113 return sym; 114 } 115 116 public String tokenizeSymbol(Symbol s) throws IllegalSymbolException { 117 getAlphabet().validate(s); 118 return s.getName(); 119 } 120}