001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.symbol; 024 025import java.io.NotSerializableException; 026import java.io.ObjectStreamException; 027import java.io.Serializable; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.List; 031import java.util.NoSuchElementException; 032import java.util.Set; 033 034import org.biojava.bio.Annotation; 035import org.biojava.bio.BioError; 036import org.biojava.bio.seq.io.IntegerTokenization; 037import org.biojava.bio.seq.io.SubIntegerTokenization; 038import org.biojava.bio.seq.io.SymbolTokenization; 039import org.biojava.utils.ChangeVetoException; 040import org.biojava.utils.SingletonList; 041import org.biojava.utils.StaticMemberPlaceHolder; 042import org.biojava.utils.Unchangeable; 043import org.biojava.utils.cache.WeakValueHashMap; 044 045/** 046 * <p> 047 * An efficient implementation of an Alphabet over the infinite set of integer 048 * values. 049 * </p> 050 * 051 * <p> 052 * This class can be used to represent lists of integer numbers as a 053 * SymbolList with the alphabet IntegerAlphabet. These lists can then be 054 * annotated with features, or fed into dynamic-programming algorithms, or 055 * processed as per any other SymbolList object. 056 * </p> 057 * 058 * <p> 059 * Object identity should be used to decide if two IntegerSymbol objects are 060 * the same. IntegerAlphabet ensures that all IntegerSymbol instances are 061 * canonicalized. 062 * </p> 063 * 064 * @author Matthew Pocock 065 * @author Mark Schreiber 066 * @author Thomas Down 067 */ 068 069public final class IntegerAlphabet 070 extends 071 Unchangeable 072 implements 073 Alphabet, 074 Serializable 075{ 076 /** 077 * The singleton instance of the IntegerAlphabet class. 078 */ 079 public static IntegerAlphabet INSTANCE; 080 081 private Object writeReplace() throws ObjectStreamException { 082 try { 083 return new StaticMemberPlaceHolder(IntegerAlphabet.class.getField("INSTANCE")); 084 } catch (NoSuchFieldException nsfe) { 085 throw new NotSerializableException(nsfe.getMessage()); 086 } 087 } 088 089 /** 090 * Construct a finite contiguous subset of the <code>IntegerAlphabet</code>. 091 * Useful for making CrossProductAlphabets with other <code>FiniteAlphabet</code>s. 092 * 093 * @param min the lower bound of the Alphabet 094 * @param max the upper bound of the Alphabet 095 * @throws IllegalArgumentException if max < min 096 * @return A FiniteAlphabet from min to max <b>inclusive</b>. 097 */ 098 public static SubIntegerAlphabet getSubAlphabet(int min, int max) 099 throws IllegalArgumentException { 100 String name = "SUBINTEGER["+min+".."+max+"]"; 101 if(AlphabetManager.registered(name)){ 102 return (SubIntegerAlphabet) (AlphabetManager.alphabetForName(name)); 103 } 104 105 FiniteAlphabet a = new SubIntegerAlphabet(min, max); 106 AlphabetManager.registerAlphabet(a.getName(),a); 107 108 return (SubIntegerAlphabet) (AlphabetManager.alphabetForName(name)); 109 } 110 111 /** 112 * Retrieve a SymbolList view of an array of integers. 113 * <p> 114 * The returned object is a view onto the underlying array, and does not copy 115 * it. Changes made to the original array will alter the symulting SymbolList. 116 * 117 * @param iArray the array of integers to view 118 * @return a SymbolList over the IntegerAlphabet that represent the values in 119 * iArray 120 */ 121 public static SymbolList fromArray(int [] iArray) { 122 return new IntegerArray(iArray); 123 } 124 125 /** 126 * Retrieve the single IntegerAlphabet instance. 127 * 128 * @return the singleton IntegerAlphabet instance 129 */ 130 public static IntegerAlphabet getInstance() { 131 if(INSTANCE == null) { 132 INSTANCE = new IntegerAlphabet(); 133 //add an alias 134 AlphabetManager.registerAlphabet("Alphabet of all integers.", INSTANCE); 135 } 136 137 return INSTANCE; 138 } 139 140 /** 141 * Canonicalization map for ints and references to symbols. 142 */ 143 private WeakValueHashMap intToSym; 144 145 private IntegerAlphabet() { 146 intToSym = new WeakValueHashMap(); 147 } 148 149 /** 150 * Retrieve the Symbol for an int. 151 * 152 * @param val the int to view 153 * @return a IntegerSymbol embodying val 154 */ 155 156 public synchronized IntegerSymbol getSymbol(int val) { 157 Integer i = new Integer(val); 158 IntegerSymbol sym = (IntegerSymbol) intToSym.get(i); 159 if(sym == null) { 160 sym = new IntegerSymbol(val); 161 intToSym.put(i, sym); 162 } 163 return sym; 164 } 165 166 public Symbol getGapSymbol() { 167 return AlphabetManager.getGapSymbol(getAlphabets()); 168 } 169 170 public Annotation getAnnotation() { 171 return Annotation.EMPTY_ANNOTATION; 172 } 173 174 public List getAlphabets() { 175 return new SingletonList(this); 176 } 177 178 public Symbol getSymbol(List symList) 179 throws IllegalSymbolException { 180 throw new BioError("Unimplemneted method"); 181 } 182 183 public Symbol getAmbiguity(Set symSet) 184 throws IllegalSymbolException { 185 throw new BioError("Unimplemneted method"); 186 } 187 188 public boolean contains(Symbol s) { 189 if(s instanceof IntegerSymbol) { 190 return true; 191 } else { 192 return false; 193 } 194 } 195 196 public void validate(Symbol s) throws IllegalSymbolException { 197 if(!contains(s)) { 198 throw new IllegalSymbolException( 199 "Only symbols of type IntegerAlphabet.IntegerSymbol are valid for this alphabet.\n" + 200 "(" + s.getClass() + ") " + s.getName() 201 ); 202 } 203 } 204 205 public String getName() { 206 return "INTEGER"; 207 } 208 209 /** 210 * Creates a new parser (Mark Schreiber 3 May 2001). 211 * 212 * @param name Currently only "token" is supported. You may also 213 * use "default" as a synonym of "token" 214 * @return an IntegerParser. 215 */ 216 public SymbolTokenization getTokenization(String name) { 217 if(name.equals("token") || name.equals("default")){ 218 return new IntegerTokenization(); 219 }else{ 220 throw new NoSuchElementException(name + " parser not supported by IntegerAlphabet yet"); 221 } 222 } 223 224 /** 225 * A single int value. 226 * <p> 227 * @author Matthew Pocock 228 */ 229 public static class IntegerSymbol 230 extends 231 Unchangeable 232 implements 233 AtomicSymbol, 234 Serializable 235 { 236 private final int val; 237 private final Alphabet matches; 238 239 public Annotation getAnnotation() { 240 return Annotation.EMPTY_ANNOTATION; 241 } 242 243 public String getName() { 244 return val + ""; 245 } 246 247 public int intValue() { 248 return val; 249 } 250 251 public Alphabet getMatches() { 252 return matches; 253 } 254 255 public List getSymbols() { 256 return new SingletonList(this); 257 } 258 259 public Set getBases() { 260 return Collections.singleton(this); 261 } 262 263 protected IntegerSymbol(int val) { 264 this.val = val; 265 this.matches = new SingletonAlphabet(this); 266 } 267 268 public int hashCode(){ 269 int result = 17; 270 result = 37*result+intValue(); 271 return result; 272 } 273 274 public boolean equals(Object o){ 275 if(o == this) return true; 276 if(o instanceof IntegerSymbol){ 277 IntegerSymbol i = (IntegerSymbol) o; 278 if (i.intValue() == this.intValue()) { 279 return true; 280 } 281 } 282 return false; 283 } 284 } 285 286 /** 287 * A light-weight implementation of SymbolList that allows an array to 288 * appear to be a SymbolList. 289 * 290 * @author Matthew Pocock 291 */ 292 private static class IntegerArray 293 extends AbstractSymbolList implements Serializable { 294 private final int [] iArray; 295 296 public Alphabet getAlphabet() { 297 return INSTANCE; 298 } 299 300 public Symbol symbolAt(int i) { 301 return new IntegerSymbol(iArray[i-1]); 302 } 303 304 public int length() { 305 return iArray.length; 306 } 307 308 public IntegerArray(int [] iArray) { 309 this.iArray = iArray; 310 } 311 } 312 313 /** 314 * A class to represent a finite contiguous subset of the infinite IntegerAlphabet 315 * 316 * @author Mark Schreiber 317 * @author Matthew Pocock 318 * @since 1.3 319 */ 320 public static class SubIntegerAlphabet 321 extends AbstractAlphabet { 322 private int min; 323 private int max; 324 private String name; // cache this for performance 325 326 /** 327 * Construct a contiguous sub alphabet with the integers from min to max inclusive. 328 */ 329 private SubIntegerAlphabet(int min, int max) throws IllegalArgumentException{ 330 if(max < min) { 331 throw new IllegalArgumentException( 332 "min must be less than max: " + 333 min + " : " + max 334 ); 335 } 336 337 this.min = min; 338 this.max = max; 339 340 this.name = "SUBINTEGER["+min+".."+max+"]"; 341 } 342 343 public String getName() { 344 return name; 345 } 346 347 protected boolean containsImpl(AtomicSymbol sym) { 348 if(!IntegerAlphabet.getInstance().contains(sym)) { 349 return false; 350 } 351 352 IntegerSymbol is = (IntegerSymbol) sym; 353 return is.intValue() >= min && is.intValue() <= max; 354 } 355 356 /** 357 * @param name Currently only "token" is supported. 358 * @return an IntegerParser. 359 */ 360 public SymbolTokenization getTokenization(String name) { 361 if(name.equals("token") || name.equals("default")){ 362 return new SubIntegerTokenization(this); 363 }else{ 364 throw new NoSuchElementException(name + " parser not supported by IntegerAlphabet yet"); 365 } 366 } 367 368 public IntegerSymbol getSymbol(int val) 369 throws IllegalSymbolException { 370 if(val < min || val > max) { 371 throw new IllegalSymbolException( 372 "Could not get Symbol for value " + 373 val + " as it is not in the range " + 374 min + " : " + max 375 ); 376 } 377 378 return IntegerAlphabet.getInstance().getSymbol(val); 379 } 380 381 public int size() { 382 return max - min + 1; 383 } 384 385 public List getAlphabets() { 386 return new SingletonList(this); 387 } 388 389 390 protected AtomicSymbol getSymbolImpl(List symL) throws 391 IllegalSymbolException { 392 393 if (symL.size() != 1) { 394 throw new IllegalSymbolException( 395 "SubIntegerAlphabet is one-dimensional: " + this.getName() + 396 " : " + symL); 397 } 398 399 AtomicSymbol s = (AtomicSymbol) symL.get(0); 400 this.validate(s); 401 return s; 402 } 403 404 protected void addSymbolImpl(AtomicSymbol sym) 405 throws ChangeVetoException { 406 throw new ChangeVetoException( 407 "Can't add symbols to immutable alphabet " + 408 getName() 409 ); 410 } 411 412 public void removeSymbol(Symbol sym) 413 throws ChangeVetoException { 414 throw new ChangeVetoException( 415 "Can't remove symbols from immutable alphabet " + 416 getName() 417 ); 418 } 419 420 public Iterator iterator() { 421 return new Iterator() { 422 int indx = min; 423 424 public boolean hasNext() { 425 return indx <= max; 426 } 427 428 public Object next() { 429 try { 430 Symbol sym = getSymbol(indx); 431 indx++; 432 return sym; 433 } catch (IllegalSymbolException ise) { 434 throw new BioError( 435 "Assertion Failure: symbol " + indx + 436 " produced by iterator but not found in " + getName() 437 ,ise 438 ); 439 } 440 } 441 442 public void remove() { 443 throw new UnsupportedOperationException(); 444 } 445 }; 446 } 447 448 public Annotation getAnnotation() { 449 return Annotation.EMPTY_ANNOTATION; 450 } 451 } 452}