001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public License.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.symbol;
024
025import java.io.Serializable;
026import java.util.Collections;
027import java.util.Iterator;
028import java.util.List;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.seq.io.SeqIOAdapter;
032import org.biojava.bio.seq.io.StreamParser;
033import org.biojava.bio.seq.io.SymbolTokenization;
034import org.biojava.utils.ChangeEvent;
035import org.biojava.utils.ChangeListener;
036import org.biojava.utils.ChangeSupport;
037import org.biojava.utils.ChangeVetoException;
038
039/**
040 * Basic implementation of SymbolList.  This
041 * is currently backed by a normal Java array.
042 *  <p>
043 * SimpleSymbolList is now editable. edit() has been implemented
044 * in a way that edits are relatively inefficient, but symbolAt() is
045 * very efficient.
046 * </p>
047 * <p>
048 * A new constructor SimpleSymbolList(SymbolParser,String) has
049 * been added so you can now simply turn a String into a SymbolList.
050 * This is mostly to provide a simple way to create a SymbolList for
051 * people just trying to get their feet wet. So here is an example.
052 * </p>
053 * <code>
054 * String seqString = "gaattc";
055 * FiniteAlphabet dna = (FiniteAlphabet) AlphabetManager.alphabetForName("DNA");
056 * SymbolParser parser = dna.getTokenization("token");
057 * SymbolList mySl = new SimpleSymbolList (parser,seqString);
058 * System.out.println("Look at my sequence " + mySl.seqString());
059 * </code>
060 * <p>
061 * with the right parser you should be able to make a protein sequence
062 * from the String "AspAlaValIleAsp"
063 * </p>
064 * <p>
065 * subList() is implemented such that subLists are views of the original until
066 * such time as the underlying SymbolList is edited in a way that would modify
067 * the subList, at which point the subList gets its own array of Symbols and
068 * does not reflect the edit to the original. When subList() is called on another
069 * subList (which is a veiw SimpleSymbolList) the new SimpleSymbolList is a view
070 * of the original, not the subList.
071 * </p>
072 *
073 * @author Thomas Down
074 * @author David Waring
075 * @author David Huen (another constructor)
076 * @author George Waldon
077 */
078
079public class SimpleSymbolList extends AbstractSymbolList implements ChangeListener, Serializable {
080    private static final long serialVersionUID = -9015317520644706924L;
081        
082    private static int instanceCount;
083
084    private static final int INCREMENT = 100;
085
086    private Alphabet alphabet;
087    private Symbol[] symbols;
088    private int length;
089    private boolean isView;  // this is for subList which returns a view onto a SimpleSymbolList until either is edited
090    private int viewOffset;  // offset of the veiw to the original
091    private SymbolList referenceSymbolList; // the original SymbolList subLists of views become sublists of original
092
093    private void addListener() {
094        incCount();
095        alphabet.addChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
096    }
097
098    private static synchronized int incCount() {
099        return ++instanceCount;
100    }
101
102    protected void finalize() throws Throwable {
103        super.finalize();
104        // System.err.println("Finalizing a SimpleSymbolList: " + decCount());
105        alphabet.removeChangeListener(ChangeListener.ALWAYS_VETO, Alphabet.SYMBOLS);
106        if (isView){
107            referenceSymbolList.removeChangeListener(this);
108        }
109    }
110
111    /**
112     * Construct an empty SimpleSymbolList.
113     *
114     * @param alpha The alphabet of legal symbols in this list.
115     */
116
117    public SimpleSymbolList(Alphabet alpha) {
118        this.alphabet = alpha;
119        this.length = 0;
120        this.symbols = new Symbol[INCREMENT];
121        this.isView = false;
122        this.viewOffset = 0;
123        addListener();
124    }
125
126    /**
127     * Construct a SymbolList containing the symbols in the specified list.
128     *
129     * @param alpha The alphabet of legal symbols for this list.
130     * @param rList A Java List of symbols.
131     *
132     * @throws IllegalSymbolException if a Symbol is not in the specified alphabet.
133     * @throws ClassCastException if rList contains objects which do not implement Symbol.
134     */
135
136    public SimpleSymbolList(Alphabet alpha, List rList)
137        throws IllegalSymbolException
138    {
139        this.alphabet = alpha;
140        this.length = rList.size();
141        symbols = new Symbol[length];
142        int pos = 0;
143        for (Iterator i = rList.iterator(); i.hasNext(); ) {
144            symbols[pos] = (Symbol) i.next();
145            alphabet.validate(symbols[pos]);
146            pos++;
147        }
148        this.isView = false;
149        this.viewOffset = 0;
150        addListener();
151    }
152
153    /**
154     * Construct a SymbolList from a string.
155     *
156     * @param parser A SymbolParser for whatever your string is -- e.g. alphabet.getParser("token").
157     * @param seqString A Java List of symbols.
158     *
159     * @throws IllegalSymbolException if a Symbol is not in the specified alphabet.
160     */
161
162    public SimpleSymbolList(SymbolTokenization parser, String seqString)
163        throws IllegalSymbolException
164    {
165        if (parser.getTokenType() == SymbolTokenization.CHARACTER) {
166            symbols = new Symbol[seqString.length()];
167        } else {
168            symbols = new Symbol[INCREMENT];
169        }
170        char[] charArray = new char[1024];
171        int segLength = seqString.length();
172        StreamParser stParser = parser.parseStream(new SSLIOListener());
173        int charCount = 0;
174        int chunkLength;
175        while (charCount < segLength) {
176            chunkLength = Math.min(charArray.length, segLength - charCount);
177            seqString.getChars(charCount, charCount + chunkLength, charArray, 0);
178            stParser.characters(charArray, 0, chunkLength);
179            charCount += chunkLength;
180        }
181        stParser.close();
182
183        this.alphabet = parser.getAlphabet();
184        this.isView = false;
185        this.viewOffset = 0;
186        addListener();
187    }
188
189    /**
190     * Construct a copy of an existing SymbolList.
191     *
192     * @param sl the list to copy.
193     */
194
195    public SimpleSymbolList(SymbolList sl) {
196        this.alphabet = sl.getAlphabet();
197        this.length = sl.length();
198        symbols = new Symbol[length];
199        for (int i = 0; i < length; ++i) {
200            symbols[i] = sl.symbolAt(i + 1);
201        }
202        this.isView = false;
203        this.viewOffset = 0;
204        addListener();
205    }
206
207    /**
208     * Construct a SimpleSymbolList given the Symbol array that backs it.
209     * Used primarily with the chunked SymbolList builder but could be used
210     * elsewhere too.
211     */
212    public SimpleSymbolList(Symbol [] symbols, int length, Alphabet alphabet)
213    {
214        this.symbols = symbols;
215        this.length = length;
216        this.alphabet = alphabet;
217        this.isView = false;
218        this.viewOffset = 0;
219        addListener();
220    }
221
222    /**
223     * Construct construct a SimpleSymbolList that is a veiw of the original.
224     *    this is used by subList();
225     *
226     * @param orig -- the original SimpleSymbolList that this is a view of.
227     * @param start -- first base in new SymbolList
228     * @param end -- last base in new SymbolList
229     */
230
231    private SimpleSymbolList(SimpleSymbolList orig, int start, int end) {
232        this.alphabet = orig.alphabet;
233        this.symbols = orig.symbols;
234        this.length = end - start + 1;
235        this.isView = true;
236        this.viewOffset = start -1;
237        this.referenceSymbolList = orig;
238        addListener();
239    }
240
241    /**
242     * Get the alphabet of this SymbolList.
243     */
244
245    public Alphabet getAlphabet() {
246      return alphabet;
247    }
248
249    /**
250     * Get the length of this SymbolList.
251     */
252
253    public int length() {
254      return length;
255    }
256
257    /**
258     * Find a symbol at a specified offset in the SymbolList.
259     *
260     * @param pos Position in biological coordinates (1..length)
261     */
262
263    public Symbol symbolAt(int pos) {
264//        if (pos > length || pos < 1) {
265//          throw new IndexOutOfBoundsException(
266//            "Can't access " + pos +
267//            " as it is not within 1.." + length
268//          );
269//        }
270      // fixme: I have added this check back in a different way as the index
271      // system flips from arrays to symbols - we need this detailed debug
272      // messaging - anybody want to performance check with/without the try?
273      try {
274        return symbols[viewOffset + pos - 1];
275      } catch (IndexOutOfBoundsException e) {
276        throw new IndexOutOfBoundsException(
277                "Index must be within [1.." + length() + "] : " + pos);
278      }
279    }
280
281
282    /**
283    * create a subList of the original, this will be a view until
284    * either the original symbolList or the sublist is edited
285    */
286
287    public SymbolList subList(int start, int end){
288        if (start < 1 || end > length()) {
289            throw new IndexOutOfBoundsException(
290                      "Sublist index out of bounds " + length() + ":" + start + "," + end
291                      );
292        }
293
294        if (end < start) {
295            throw new IllegalArgumentException(
296                "end must not be lower than start: start=" + start + ", end=" + end
297                );
298        }
299
300        SimpleSymbolList sl = new SimpleSymbolList(this,viewOffset+start,viewOffset+end);
301        if (isView){
302            referenceSymbolList.addChangeListener(sl);
303        }else{
304            this.addChangeListener(sl);
305        }
306        return sl;
307    }
308    /**
309    * Apply and edit to the SymbolList as specified by Edit.
310    * <p>
311    *   edit() is now supported using the ChangeEvent system. SubLists do NOT reflect edits.
312    * </p>
313    */
314
315    public synchronized void edit(Edit edit)throws IndexOutOfBoundsException, IllegalAlphabetException,ChangeVetoException {
316        ChangeSupport cs;
317        ChangeEvent cevt;
318        Symbol[] dest;
319        int newLength;
320
321        // first make sure that it is in bounds
322        if ((edit.pos + edit.length > length +1 ) || (edit.pos <= 0) || edit.length < 0){
323            throw new IndexOutOfBoundsException();
324        }
325        // make sure that the symbolList is of the correct alphabet
326        if (( edit.replacement.getAlphabet() != alphabet) &&  (edit.replacement != SymbolList.EMPTY_LIST)){
327            throw new IllegalAlphabetException();
328        }
329
330        // give the listeners a change to veto this
331         // create a new change event ->the EDIT is a static final variable of type ChangeType in SymbolList interface
332        cevt = new ChangeEvent(this, SymbolList.EDIT, edit);
333        cs = getChangeSupport(SymbolList.EDIT);
334        synchronized(cs) {
335            // let the listeners know what we want to do
336            cs.firePreChangeEvent(cevt);
337
338            // if nobody complained lets continue
339            // if we are a view we convert to a real SimpleSymbolList
340            if (isView){
341                makeReal();
342            }
343            // now for the edit
344            int posRightFragInSourceArray5 = edit.pos + edit.length - 1;
345            int rightFragLength = length - posRightFragInSourceArray5;
346            int posRightFragInDestArray5 = posRightFragInSourceArray5 + edit.replacement.length() - edit.length;
347            int posReplaceFragInDestArray5 = edit.pos - 1;
348            int replaceFragLength = edit.replacement.length();
349            int totalLength = length + replaceFragLength - edit.length + INCREMENT; // What is this increment for?
350
351            // extend the array
352            dest = new Symbol[totalLength];
353            // copy symbols before the edit and make sure we are not editing the edit at the same time (hoops!)
354            System.arraycopy(symbols,0,dest,0,(edit.pos -1));
355
356            // copy the symbols after the edit
357            if (rightFragLength > 0){
358                System.arraycopy(symbols, posRightFragInSourceArray5, dest, posRightFragInDestArray5,rightFragLength);
359            }
360            // copy the symbols within the edit
361            for (int i = 1; i <= replaceFragLength; i++){
362                dest[posReplaceFragInDestArray5 + i - 1] = edit.replacement.symbolAt(i);
363            }
364
365            // if there was a net deletion we have to get rid of the remaining symbols
366            newLength = length + replaceFragLength - edit.length;
367            for (int j = newLength; j < totalLength; j++){
368                dest[j] = null;
369            }
370            length = newLength;
371            symbols = dest;
372            cs.firePostChangeEvent(cevt);
373        }
374    }
375
376    /**
377    *  On preChange() we convert the SymolList to a non-veiw version, giving it its own copy of symbols
378    */
379
380    public void preChange(ChangeEvent cev) throws ChangeVetoException{
381
382        // lets not bother making any changes if the edit would not effect us or our children
383        Object change = cev.getChange();
384        if( (change != null) && (change instanceof Edit) ) {
385            Edit e = (Edit)change;
386            if (e.pos > (viewOffset + length)){
387                return;
388            }
389            if ((e.pos < viewOffset) && (e.length - e.replacement.length() == 0)){
390                return;
391            }
392
393        // subLists of views are listeners to the original so we don't have to forward the message
394        makeReal();
395        }
396    }
397
398    // we don't do anything on the postChange we don't want to reflect the changes
399    public void postChange(ChangeEvent cev){
400    }
401
402    /**
403    *  Converts a view symbolList to a real one
404    *  that means it gets its own copy of the symbols array
405    */
406    private void makeReal(){
407        if(isView){
408            Symbol[] newSymbols = new Symbol[length];
409            System.arraycopy (symbols,viewOffset,newSymbols, 0, length);
410            this.symbols = newSymbols;
411            this.isView = false;
412            this.viewOffset = 0;
413            referenceSymbolList.removeChangeListener(this);
414            referenceSymbolList = null;
415        }
416    }
417
418
419    /**
420     * Add a new Symbol to the end of this list.
421     *
422     * @param sym Symbol to add
423     * @throws IllegalSymbolException if the Symbol is not in this list's alphabet
424     */
425
426      public void addSymbol(Symbol sym)
427          throws IllegalSymbolException, ChangeVetoException
428      {
429          try {
430              SymbolList extraSymbol = new SimpleSymbolList(getAlphabet(), Collections.nCopies(1, sym));
431              edit(new Edit(length() + 1, 0, extraSymbol));
432          } catch (IllegalAlphabetException ex) {
433              throw new IllegalSymbolException(ex, sym, "Couldn't add symbol");
434          } catch (IndexOutOfBoundsException ex) {
435              throw new BioError("Assertion failure: couldn't add symbol at end of list");
436          }
437      }
438
439    /**
440     * Return the Java Symbol[] array that backs this object.
441     * primarily used to accelerate reconstruction of symbol lists
442     * in the packed chunked symbol list implementation.
443     */
444    public Symbol [] getSymbolArray()
445    {
446        return symbols;
447    }
448
449    /**
450     * Simple inner class for channelling sequence notifications from
451     * a StreamParser.
452     */
453
454    private class SSLIOListener extends SeqIOAdapter {
455        public void addSymbols(Alphabet alpha,Symbol[] syms,int start, int length){
456            if(symbols.length < SimpleSymbolList.this.length + length) {
457                Symbol[] dest;
458                dest = new Symbol [((int) (1.5 * SimpleSymbolList.this.length)) + length];
459                System.arraycopy(symbols, 0, dest, 0, SimpleSymbolList.this.length);
460                System.arraycopy(syms, start, dest, SimpleSymbolList.this.length, length);
461                symbols = dest;
462            }else{
463                System.arraycopy(syms, start, symbols, SimpleSymbolList.this.length, length);
464            }
465
466            SimpleSymbolList.this.length += length;
467        }
468    }
469}