001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.symbol; 024 025import java.io.ObjectStreamException; 026import java.io.Serializable; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import java.util.NoSuchElementException; 033import java.util.Set; 034 035import org.biojava.bio.Annotation; 036import org.biojava.bio.BioException; 037import org.biojava.bio.seq.io.CrossProductTokenization; 038import org.biojava.bio.seq.io.NameTokenization; 039import org.biojava.bio.seq.io.SymbolTokenization; 040import org.biojava.utils.AbstractChangeable; 041import org.biojava.utils.ChangeEvent; 042import org.biojava.utils.ChangeSupport; 043import org.biojava.utils.ChangeVetoException; 044 045/** 046 * <p> 047 * An abstract implementation of <code>Alphabet</code>. 048 * </p> 049 * 050 * <p> 051 * This provides the frame-work for maintaining the SymbolParser <-> name 052 * mappings and also for the ChangeListeners. 053 * </p> 054 * 055 * <p> 056 * This class is for developers to derive from, not for use directly. 057 * </p> 058 * 059 * @author Matthew Pocock 060 * @author Greg Cox 061 * @author Francois Pepin 062 * @author Mark Schreiber 063 * @since 1.1 064 */ 065public abstract class AbstractAlphabet 066 extends 067 AbstractChangeable 068 implements 069 FiniteAlphabet, 070 Serializable 071{ 072 private final Map tokenizationsByName; 073 private final Map ambCache; 074 public static final long serialVersionUID = -3043128839927615753l; 075 076 /* 077 * this field records if the alpha has been registered with the AlphabetManager 078 * in the current VM. It is important for serialization, if the alpha has been registered 079 * it needs to be registered at the other end. 080 */ 081 private boolean registered; 082 083 /** 084 * Used by the AlphabetManager to inform an Alphabet that is has been 085 * registered and that is should be registered if it is transported to a new 086 * AlphabetManager on another VM 087 */ 088 void setRegistered(boolean value){ 089 this.registered = value; 090 } 091 boolean getRegsitered(){ 092 return(registered); 093 } 094 095 096 097 { 098 tokenizationsByName = new HashMap(); 099 ambCache = new HashMap(); 100 registered = false; 101 } 102 103 104 /** 105 * To prevent duplication of a what should be a 106 * single instance of an existing alphabet. This method 107 * was written as protected so that subclasses even from 108 * other packages will inherit it. It should only be overridden 109 * with care. 110 */ 111 protected Object readResolve() throws ObjectStreamException{ 112 113 if(AlphabetManager.registered(this.getName())){ 114 return AlphabetManager.alphabetForName(this.getName()); 115 }else{ 116 if(this.registered){ 117 AlphabetManager.registerAlphabet(this.getName(), this); 118 } 119 return this; 120 } 121 } 122 123 /** 124 * <p> 125 * Assigns a symbol parser to a String object. 126 * </p> 127 * 128 * <p> 129 * Afterwards, the parser can be retrieved using the 130 * getTokenization(Sting name) method. 131 * </p> 132 */ 133 public void putTokenization(String name, SymbolTokenization parser) { 134 tokenizationsByName.put(name, parser); 135 } 136 137 public SymbolTokenization getTokenization(String name) 138 throws NoSuchElementException, BioException 139 { 140 SymbolTokenization toke = (SymbolTokenization) tokenizationsByName.get(name); 141 if(toke == null) { 142 if(name.equals("name")) { 143 if (getAlphabets().size() == 1) { 144 toke = new NameTokenization(this); 145 } else { 146 toke = new CrossProductTokenization(this); 147 } 148 putTokenization(name, toke); 149 } else if (name.equals("default")) { 150 151 if (tokenizationsByName.containsKey("token")) 152 toke= (SymbolTokenization)tokenizationsByName.get("token"); 153 else 154 toke= (SymbolTokenization)getTokenization("name"); 155 putTokenization(name, toke); 156 157 } 158 else 159 { 160 throw new NoSuchElementException("There is no tokenization '" + name + 161 "' defined in alphabet " + getName()); 162 } 163 } 164 return toke; 165 } 166 167 public final Symbol getAmbiguity(Set syms) 168 throws IllegalSymbolException 169 { 170 if (syms.size() == 0) { 171 return getGapSymbol(); 172 } else if (syms.size() == 1) { 173 Symbol sym = (Symbol) syms.iterator().next(); 174 validate(sym); 175 return sym; 176 } else { 177 Symbol s = (Symbol) ambCache.get(syms); 178 if(s == null) { 179 for (Iterator i = syms.iterator(); i.hasNext(); ) { 180 validate((Symbol) i.next()); 181 } 182 183 184 s = getAmbiguityImpl(syms); 185 ambCache.put(new HashSet(syms), s); 186 } 187 return s; 188 } 189 } 190 191 /** 192 * Backend for getAmbiguity, called when it is actually necessarly to create a new symbol. 193 * By default, calls AlphabetManager.createSymbol. 194 * 195 * @since 1.3 196 */ 197 198 protected Symbol getAmbiguityImpl(Set syms) 199 throws IllegalSymbolException 200 { 201 return AlphabetManager.createSymbol( 202 Annotation.EMPTY_ANNOTATION, 203 syms, this 204 ); 205 } 206 207 public final Symbol getSymbol(List syms) 208 throws IllegalSymbolException { 209 if (syms.size() == 1) { 210 Symbol s = (Symbol) syms.get(0); 211 validate(s); 212 return s; 213 } 214 215 List alphas = getAlphabets(); 216 217 if(alphas.size() != syms.size()) { 218 throw new IllegalSymbolException( 219 "Can't retrieve symbol as symbol list is the wrong length " + 220 syms.size() + ":" + alphas.size() 221 ); 222 } 223 224 Iterator si = syms.iterator(); 225 int atomic = 0; 226 while(si.hasNext()) { 227 Symbol s = (Symbol) si.next(); 228 //Alphabet a = (Alphabet) ai.next(); 229 //a.validate(s); // very expensive for requent fetches! 230 if(s instanceof AtomicSymbol) { 231 atomic++; 232 } 233 } 234 235 if(atomic == syms.size()) { 236 return getSymbolImpl(syms); 237 } else { 238 return AlphabetManager.createSymbol( 239 Annotation.EMPTY_ANNOTATION, 240 syms, this 241 ); 242 } 243 } 244 245 protected abstract AtomicSymbol getSymbolImpl(List symList) 246 throws IllegalSymbolException; 247 248 protected abstract void addSymbolImpl(AtomicSymbol s) 249 throws IllegalSymbolException, ChangeVetoException; 250 251 public final void addSymbol(Symbol s) 252 throws IllegalSymbolException, ChangeVetoException { 253 if(s == null) { 254 throw new IllegalSymbolException( 255 "You can not add null as a symbol to alphabet " + getName() 256 ); 257 } 258 259 if(hasListeners()) { 260 ChangeSupport cs = getChangeSupport(Alphabet.SYMBOLS); 261 synchronized(cs) { 262 ChangeEvent ce = new ChangeEvent(this, Alphabet.SYMBOLS, s, null); 263 cs.firePreChangeEvent(ce); 264 doAddSymbol(s); 265 cs.firePostChangeEvent(ce); 266 } 267 } else { 268 doAddSymbol(s); 269 } 270 } 271 272 private void doAddSymbol(Symbol s) 273 throws IllegalSymbolException, ChangeVetoException { 274 Alphabet sa = s.getMatches(); 275 if(!(sa instanceof FiniteAlphabet)) { 276 throw new IllegalSymbolException( 277 "Can't add symbol " + s.getName() + 278 " as it matches an infinite number of symbols." 279 ); 280 } else { 281 for(Iterator si = ((FiniteAlphabet) sa).iterator(); si.hasNext(); ) { 282 addSymbolImpl((AtomicSymbol) si.next()); 283 } 284 } 285 } 286 287 public final boolean contains(Symbol sym) { 288 if(sym instanceof AtomicSymbol) { 289 return containsImpl((AtomicSymbol) sym); 290 } else { 291 if(sym == null) { 292 throw new NullPointerException("Symbols can't be null"); 293 } 294 FiniteAlphabet matches = (FiniteAlphabet) sym.getMatches(); 295 if(matches.size() == 0) { 296 //System.out.println("Got empty symbol " + sym.getName()); 297 if(sym.equals(AlphabetManager.getGapSymbol())) { 298 //System.out.println("Global gap symbol"); 299 return true; 300 } else if(sym instanceof BasisSymbol) { 301 if(((BasisSymbol) sym).getSymbols().size() == getAlphabets().size()) { 302 //System.out.println("Basis symbol and the right length"); 303 return true; 304 } 305 } 306 //System.out.println("Empty symbol and not basis - let's accept it."); 307 return true; 308 } 309 for(Iterator i = matches.iterator(); i.hasNext(); ) { 310 AtomicSymbol s = (AtomicSymbol) i.next(); 311 if(!containsImpl(s)) { 312 return false; 313 } 314 } 315 return true; 316 } 317 } 318 319 public final Symbol getGapSymbol() { 320 return AlphabetManager.getGapSymbol(getAlphabets()); 321 } 322 323 public final void validate(Symbol sym) 324 throws IllegalSymbolException { 325 if(!contains(sym)) { 326 throw new IllegalSymbolException( 327 "Symbol " + sym.getName() + " not found in alphabet " + this.getName() 328 ); 329 } 330 } 331 332 protected abstract boolean containsImpl(AtomicSymbol s); 333 334 /* 335 336 public boolean equals(Object o) { 337 if(o == this) { 338 return true; 339 } 340 341 if(!(o instanceof FiniteAlphabet)) { 342 return false; 343 } 344 345 FiniteAlphabet that = (FiniteAlphabet) o; 346 347 if(this.size() != that.size()) { 348 return false; 349 } 350 351 for(Iterator i = that.iterator(); i.hasNext(); ) { 352 if(!this.contains((AtomicSymbol) i.next())) { 353 return false; 354 } 355 } 356 357 return true; 358 } 359 360 */ 361 362 public String toString() { 363 return getName(); 364 } 365 366 protected AbstractAlphabet() {} 367} 368