001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public License. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.symbol; 024 025import java.io.Serializable; 026import java.util.Collections; 027import java.util.Iterator; 028import java.util.List; 029 030import org.biojava.bio.BioError; 031import org.biojava.bio.seq.io.SeqIOAdapter; 032import org.biojava.bio.seq.io.StreamParser; 033import org.biojava.bio.seq.io.SymbolTokenization; 034import org.biojava.utils.ChangeEvent; 035import org.biojava.utils.ChangeListener; 036import org.biojava.utils.ChangeSupport; 037import org.biojava.utils.ChangeVetoException; 038 039/** 040 * Basic implementation of SymbolList. This 041 * is currently backed by a normal Java array. 042 * <p> 043 * SimpleSymbolList is now editable. edit() has been implemented 044 * in a way that edits are relatively inefficient, but symbolAt() is 045 * very efficient. 046 * </p> 047 * <p> 048 * A new constructor SimpleSymbolList(SymbolParser,String) has 049 * been added so you can now simply turn a String into a SymbolList. 050 * This is mostly to provide a simple way to create a SymbolList for 051 * people just trying to get their feet wet. So here is an example. 052 * </p> 053 * <code> 054 * String seqString = "gaattc"; 055 * FiniteAlphabet dna = (FiniteAlphabet) AlphabetManager.alphabetForName("DNA"); 056 * SymbolParser parser = dna.getTokenization("token"); 057 * SymbolList mySl = new SimpleSymbolList (parser,seqString); 058 * System.out.println("Look at my sequence " + mySl.seqString()); 059 * </code> 060 * <p> 061 * with the right parser you should be able to make a protein sequence 062 * from the String "AspAlaValIleAsp" 063 * </p> 064 * <p> 065 * subList() is implemented such that subLists are views of the original until 066 * such time as the underlying SymbolList is edited in a way that would modify 067 * the subList, at which point the subList gets its own array of Symbols and 068 * does not reflect the edit to the original. When subList() is called on another 069 * subList (which is a veiw SimpleSymbolList) the new SimpleSymbolList is a view 070 * of the original, not the subList. 071 * </p> 072 * 073 * @author Thomas Down 074 * @author David Waring 075 * @author David Huen (another constructor) 076 * @author George Waldon 077 */ 078 079public class SimpleSymbolList extends AbstractSymbolList implements ChangeListener, Serializable { 080 private static final long serialVersionUID = -9015317520644706924L; 081 082 private static int instanceCount; 083 084 private static final int INCREMENT = 100; 085 086 private Alphabet alphabet; 087 private Symbol[] symbols; 088 private int length; 089 private boolean isView; // this is for subList which returns a view onto a SimpleSymbolList until either is edited 090 private int viewOffset; // offset of the veiw to the original 091 private SymbolList referenceSymbolList; // the original SymbolList subLists of views become sublists of original 092 093 private void addListener() { 094 incCount(); 095 alphabet.addChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS); 096 } 097 098 private static synchronized int incCount() { 099 return ++instanceCount; 100 } 101 102 protected void finalize() throws Throwable { 103 super.finalize(); 104 // System.err.println("Finalizing a SimpleSymbolList: " + decCount()); 105 alphabet.removeChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS); 106 if (isView){ 107 referenceSymbolList.removeChangeListener(this); 108 } 109 } 110 111 /** 112 * Construct an empty SimpleSymbolList. 113 * 114 * @param alpha The alphabet of legal symbols in this list. 115 */ 116 117 public SimpleSymbolList(Alphabet alpha) { 118 this.alphabet = alpha; 119 this.length = 0; 120 this.symbols = new Symbol[INCREMENT]; 121 this.isView = false; 122 this.viewOffset = 0; 123 addListener(); 124 } 125 126 /** 127 * Construct a SymbolList containing the symbols in the specified list. 128 * 129 * @param alpha The alphabet of legal symbols for this list. 130 * @param rList A Java List of symbols. 131 * 132 * @throws IllegalSymbolException if a Symbol is not in the specified alphabet. 133 * @throws ClassCastException if rList contains objects which do not implement Symbol. 134 */ 135 136 public SimpleSymbolList(Alphabet alpha, List rList) 137 throws IllegalSymbolException 138 { 139 this.alphabet = alpha; 140 this.length = rList.size(); 141 symbols = new Symbol[length]; 142 int pos = 0; 143 for (Iterator i = rList.iterator(); i.hasNext(); ) { 144 symbols[pos] = (Symbol) i.next(); 145 alphabet.validate(symbols[pos]); 146 pos++; 147 } 148 this.isView = false; 149 this.viewOffset = 0; 150 addListener(); 151 } 152 153 /** 154 * Construct a SymbolList from a string. 155 * 156 * @param parser A SymbolParser for whatever your string is -- e.g. alphabet.getParser("token"). 157 * @param seqString A Java List of symbols. 158 * 159 * @throws IllegalSymbolException if a Symbol is not in the specified alphabet. 160 */ 161 162 public SimpleSymbolList(SymbolTokenization parser, String seqString) 163 throws IllegalSymbolException 164 { 165 if (parser.getTokenType() == SymbolTokenization.CHARACTER) { 166 symbols = new Symbol[seqString.length()]; 167 } else { 168 symbols = new Symbol[INCREMENT]; 169 } 170 char[] charArray = new char[1024]; 171 int segLength = seqString.length(); 172 StreamParser stParser = parser.parseStream(new SSLIOListener()); 173 int charCount = 0; 174 int chunkLength; 175 while (charCount < segLength) { 176 chunkLength = Math.min(charArray.length, segLength - charCount); 177 seqString.getChars(charCount, charCount + chunkLength, charArray, 0); 178 stParser.characters(charArray, 0, chunkLength); 179 charCount += chunkLength; 180 } 181 stParser.close(); 182 183 this.alphabet = parser.getAlphabet(); 184 this.isView = false; 185 this.viewOffset = 0; 186 addListener(); 187 } 188 189 /** 190 * Construct a copy of an existing SymbolList. 191 * 192 * @param sl the list to copy. 193 */ 194 195 public SimpleSymbolList(SymbolList sl) { 196 this.alphabet = sl.getAlphabet(); 197 this.length = sl.length(); 198 symbols = new Symbol[length]; 199 for (int i = 0; i < length; ++i) { 200 symbols[i] = sl.symbolAt(i + 1); 201 } 202 this.isView = false; 203 this.viewOffset = 0; 204 addListener(); 205 } 206 207 /** 208 * Construct a SimpleSymbolList given the Symbol array that backs it. 209 * Used primarily with the chunked SymbolList builder but could be used 210 * elsewhere too. 211 */ 212 public SimpleSymbolList(Symbol [] symbols, int length, Alphabet alphabet) 213 { 214 this.symbols = symbols; 215 this.length = length; 216 this.alphabet = alphabet; 217 this.isView = false; 218 this.viewOffset = 0; 219 addListener(); 220 } 221 222 /** 223 * Construct construct a SimpleSymbolList that is a veiw of the original. 224 * this is used by subList(); 225 * 226 * @param orig -- the original SimpleSymbolList that this is a view of. 227 * @param start -- first base in new SymbolList 228 * @param end -- last base in new SymbolList 229 */ 230 231 private SimpleSymbolList(SimpleSymbolList orig, int start, int end) { 232 this.alphabet = orig.alphabet; 233 this.symbols = orig.symbols; 234 this.length = end - start + 1; 235 this.isView = true; 236 this.viewOffset = start -1; 237 this.referenceSymbolList = orig; 238 addListener(); 239 } 240 241 /** 242 * Get the alphabet of this SymbolList. 243 */ 244 245 public Alphabet getAlphabet() { 246 return alphabet; 247 } 248 249 /** 250 * Get the length of this SymbolList. 251 */ 252 253 public int length() { 254 return length; 255 } 256 257 /** 258 * Find a symbol at a specified offset in the SymbolList. 259 * 260 * @param pos Position in biological coordinates (1..length) 261 */ 262 263 public Symbol symbolAt(int pos) { 264// if (pos > length || pos < 1) { 265// throw new IndexOutOfBoundsException( 266// "Can't access " + pos + 267// " as it is not within 1.." + length 268// ); 269// } 270 // fixme: I have added this check back in a different way as the index 271 // system flips from arrays to symbols - we need this detailed debug 272 // messaging - anybody want to performance check with/without the try? 273 try { 274 return symbols[viewOffset + pos - 1]; 275 } catch (IndexOutOfBoundsException e) { 276 throw new IndexOutOfBoundsException( 277 "Index must be within [1.." + length() + "] : " + pos); 278 } 279 } 280 281 282 /** 283 * create a subList of the original, this will be a view until 284 * either the original symbolList or the sublist is edited 285 */ 286 287 public SymbolList subList(int start, int end){ 288 if (start < 1 || end > length()) { 289 throw new IndexOutOfBoundsException( 290 "Sublist index out of bounds " + length() + ":" + start + "," + end 291 ); 292 } 293 294 if (end < start) { 295 throw new IllegalArgumentException( 296 "end must not be lower than start: start=" + start + ", end=" + end 297 ); 298 } 299 300 SimpleSymbolList sl = new SimpleSymbolList(this,viewOffset+start,viewOffset+end); 301 if (isView){ 302 referenceSymbolList.addChangeListener(sl); 303 }else{ 304 this.addChangeListener(sl); 305 } 306 return sl; 307 } 308 /** 309 * Apply and edit to the SymbolList as specified by Edit. 310 * <p> 311 * edit() is now supported using the ChangeEvent system. SubLists do NOT reflect edits. 312 * </p> 313 */ 314 315 public synchronized void edit(Edit edit)throws IndexOutOfBoundsException, IllegalAlphabetException,ChangeVetoException { 316 ChangeSupport cs; 317 ChangeEvent cevt; 318 Symbol[] dest; 319 int newLength; 320 321 // first make sure that it is in bounds 322 if ((edit.pos + edit.length > length +1 ) || (edit.pos <= 0) || edit.length < 0){ 323 throw new IndexOutOfBoundsException(); 324 } 325 // make sure that the symbolList is of the correct alphabet 326 if (( edit.replacement.getAlphabet() != alphabet) && (edit.replacement != SymbolList.EMPTY_LIST)){ 327 throw new IllegalAlphabetException(); 328 } 329 330 // give the listeners a change to veto this 331 // create a new change event ->the EDIT is a static final variable of type ChangeType in SymbolList interface 332 cevt = new ChangeEvent(this, SymbolList.EDIT, edit); 333 cs = getChangeSupport(SymbolList.EDIT); 334 synchronized(cs) { 335 // let the listeners know what we want to do 336 cs.firePreChangeEvent(cevt); 337 338 // if nobody complained lets continue 339 // if we are a view we convert to a real SimpleSymbolList 340 if (isView){ 341 makeReal(); 342 } 343 // now for the edit 344 int posRightFragInSourceArray = edit.pos + edit.length - 1; 345 int rightFragLength = length - posRightFragInSourceArray; 346 int posRightFragInDestArray = posRightFragInSourceArray + edit.replacement.length() - edit.length; 347 int posReplaceFragInDestArray = edit.pos - 1; 348 int replaceFragLength = edit.replacement.length(); 349 int totalLength = length + replaceFragLength - edit.length + INCREMENT; // What is this increment for? 350 351 // extend the array 352 dest = new Symbol[totalLength]; 353 // copy symbols before the edit and make sure we are not editing the edit at the same time (hoops!) 354 System.arraycopy(symbols,0,dest,0,(edit.pos -1)); 355 356 // copy the symbols after the edit 357 if (rightFragLength > 0){ 358 System.arraycopy(symbols, posRightFragInSourceArray, dest, posRightFragInDestArray,rightFragLength); 359 } 360 // copy the symbols within the edit 361 for (int i = 1; i <= replaceFragLength; i++){ 362 dest[posReplaceFragInDestArray + i - 1] = edit.replacement.symbolAt(i); 363 } 364 365 // if there was a net deletion we have to get rid of the remaining symbols 366 newLength = length + replaceFragLength - edit.length; 367 for (int j = newLength; j < totalLength; j++){ 368 dest[j] = null; 369 } 370 length = newLength; 371 symbols = dest; 372 cs.firePostChangeEvent(cevt); 373 } 374 } 375 376 /** 377 * On preChange() we convert the SymolList to a non-veiw version, giving it its own copy of symbols 378 */ 379 380 public void preChange(ChangeEvent cev) throws ChangeVetoException{ 381 382 // lets not bother making any changes if the edit would not effect us or our children 383 Object change = cev.getChange(); 384 if( (change != null) && (change instanceof Edit) ) { 385 Edit e = (Edit)change; 386 if (e.pos > (viewOffset + length)){ 387 return; 388 } 389 if ((e.pos < viewOffset) && (e.length - e.replacement.length() == 0)){ 390 return; 391 } 392 393 // subLists of views are listeners to the original so we don't have to forward the message 394 makeReal(); 395 } 396 } 397 398 // we don't do anything on the postChange we don't want to reflect the changes 399 public void postChange(ChangeEvent cev){ 400 } 401 402 /** 403 * Converts a view symbolList to a real one 404 * that means it gets its own copy of the symbols array 405 */ 406 private void makeReal(){ 407 if(isView){ 408 Symbol[] newSymbols = new Symbol[length]; 409 System.arraycopy (symbols,viewOffset,newSymbols, 0, length); 410 this.symbols = newSymbols; 411 this.isView = false; 412 this.viewOffset = 0; 413 referenceSymbolList.removeChangeListener(this); 414 referenceSymbolList = null; 415 } 416 } 417 418 419 /** 420 * Add a new Symbol to the end of this list. 421 * 422 * @param sym Symbol to add 423 * @throws IllegalSymbolException if the Symbol is not in this list's alphabet 424 */ 425 426 public void addSymbol(Symbol sym) 427 throws IllegalSymbolException, ChangeVetoException 428 { 429 try { 430 SymbolList extraSymbol = new SimpleSymbolList(getAlphabet(), Collections.nCopies(1, sym)); 431 edit(new Edit(length() + 1, 0, extraSymbol)); 432 } catch (IllegalAlphabetException ex) { 433 throw new IllegalSymbolException(ex, sym, "Couldn't add symbol"); 434 } catch (IndexOutOfBoundsException ex) { 435 throw new BioError("Assertion failure: couldn't add symbol at end of list"); 436 } 437 } 438 439 /** 440 * Return the Java Symbol[] array that backs this object. 441 * primarily used to accelerate reconstruction of symbol lists 442 * in the packed chunked symbol list implementation. 443 */ 444 public Symbol [] getSymbolArray() 445 { 446 return symbols; 447 } 448 449 /** 450 * Simple inner class for channelling sequence notifications from 451 * a StreamParser. 452 */ 453 454 private class SSLIOListener extends SeqIOAdapter { 455 public void addSymbols(Alphabet alpha,Symbol[] syms,int start, int length){ 456 if(symbols.length < SimpleSymbolList.this.length + length) { 457 Symbol[] dest; 458 dest = new Symbol [((int) (1.5 * SimpleSymbolList.this.length)) + length]; 459 System.arraycopy(symbols, 0, dest, 0, SimpleSymbolList.this.length); 460 System.arraycopy(syms, start, dest, SimpleSymbolList.this.length, length); 461 symbols = dest; 462 }else{ 463 System.arraycopy(syms, start, symbols, SimpleSymbolList.this.length, length); 464 } 465 466 SimpleSymbolList.this.length += length; 467 } 468 } 469}