001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.impl;
023
024import java.util.ArrayList;
025import java.util.List;
026import java.util.Set;
027import java.util.SortedMap;
028import java.util.TreeMap;
029import java.util.TreeSet;
030
031import org.biojava.bio.seq.ComponentFeature;
032import org.biojava.bio.seq.DNATools;
033import org.biojava.bio.seq.StrandedFeature;
034import org.biojava.bio.symbol.AbstractSymbolList;
035import org.biojava.bio.symbol.Alphabet;
036import org.biojava.bio.symbol.IllegalSymbolException;
037import org.biojava.bio.symbol.Location;
038import org.biojava.bio.symbol.RangeLocation;
039import org.biojava.bio.symbol.Symbol;
040import org.biojava.bio.symbol.SymbolList;
041
042/**
043 * Support class for applications which need to patch together sections
044 * of sequence into a single SymbolList.  This class isn't intended
045 * for direct use in user code -- instead, it is a helper for people
046 * implementing the full BioJava assembly model.  See SimpleAssembly
047 * for an example.
048 *
049 * @author Thomas Down
050 * @author Greg Cox
051 * @author Matthew Pocock
052 * @author David Huen (support for overlapped features)
053 * @since 1.1
054 */
055
056public class NewAssembledSymbolList extends AbstractSymbolList {
057    private boolean autoLength = true;
058    private int length = 0;
059
060    private SortedMap components; // maps between locations and the actual components
061    private List componentList; // an ordered list of component locations in assembly space
062
063    private final Symbol noninformativeSymbol = DNATools.n();
064
065
066    private class TranslatedSymbolList extends AbstractSymbolList
067    {
068        ComponentFeature cf;
069        int translation;
070        int length;
071        SymbolList underlyingSymList;
072
073        private TranslatedSymbolList(ComponentFeature cf)
074//            throws IllegalAlphabetException
075        {
076//            if (cf.getComponentSequence().getAlphabet() != DNATools.getDNA()) 
077//                throw new IllegalAlphabetException("TranslatedSymbolList only accepts DNA.");
078
079            this.cf = cf;
080
081            if (cf.getStrand() == StrandedFeature.POSITIVE) {
082                translation = cf.getLocation().getMin() - cf.getComponentLocation().getMin();
083            }
084            else {
085                translation = cf.getLocation().getMax() + cf.getComponentLocation().getMin();
086            }
087
088            // cache these for speed
089            underlyingSymList = cf.getComponentSequence();
090            length = underlyingSymList.length();
091        }
092
093        /**
094         * this is the only method that needs implementation. (need to reverse complement?)
095         */
096        public Symbol symbolAt(int i)
097        {
098            // compute index
099            int idx;
100
101            try {
102                if (cf.getStrand() == StrandedFeature.POSITIVE) {
103                    idx = i - translation;
104                    if ((idx<1) || (idx>length)) return noninformativeSymbol;
105//                    System.out.println("##########+++ " + i + " " + idx);
106                    return underlyingSymList.symbolAt(idx);                
107                }
108                else {
109                    idx = translation - i;
110                    if ((idx<1) || (idx>length)) return noninformativeSymbol;
111//                    System.out.println("##########--- " + i + " " + idx);
112                    return DNATools.complement(underlyingSymList.symbolAt(idx));
113                }
114            }
115            catch (IllegalSymbolException ise) {
116                return noninformativeSymbol;
117            }
118        }
119
120
121        /**
122         * should I bother with this at all?
123         */
124        public Alphabet getAlphabet() { return DNATools.getDNA(); }
125
126        /**
127         * this one has to be faked to see much longer
128         * than it is so the translated locations appear valid.
129         */
130        public int length() { return cf.getLocation().getMax(); }
131
132        public ComponentFeature getFeature() { return cf; }
133    }
134
135
136    {
137        components = new TreeMap(Location.naturalOrder);
138        componentList = new ArrayList();
139    }
140
141    public void setLength(int len) {
142        autoLength = false;
143        length = len;
144    }
145
146    public void putComponent(ComponentFeature f) {
147       // when receiving a new entry there are
148       // two passes.
149
150       // in pass one, we remove any redundant entries from the
151       // existing components map and modify the new entry to deal
152       // with existing entries.
153
154       // in pass two, we recreate componentList from the comonents
155       // key.
156
157       // components has as key the assembly sequence ranges
158       // that each component maps.  The value is the component
159       // SymbolList itself.
160
161       // componentList stores the ranges as Locations to accelerate
162       // searches for the current component.
163
164       // PASS 1
165       // ======
166       // when entering a new component, the following checks
167       // are needed:-
168       // 1) if the new range is completely contained by
169       //    a current range, discard it and its component.
170       // 2) if the new range completely contains a current
171       //    range, discard the current range and its component.
172       // 3) if the new range overlaps an existing range, adjust
173       //    the new range to remove the overlap.
174
175
176       // find the first range  that contains/right of the
177       // left limit of the current range
178       Location newLoc = new RangeLocation(f.getLocation().getMin(), f.getLocation().getMax());
179       int startIdx = idxRightOfPoint(f.getLocation().getMin());
180
181//       System.out.println("testing " + f.getLocation() + " result is " + startIdx);
182
183       if (startIdx == NOTFOUND) {
184           // either there are no entries yet or that all of 
185           // them are to left of this point.
186           // in this case, we just insert the new entry
187//           System.out.println("putting " + f.getLocation() + " " + f.getLocation() + " " + f.getStrand() + " " + f.getComponentSequence().seqString());
188           components.put(f.getLocation(), new TranslatedSymbolList(f));
189           recreateList();
190           return;
191       }
192       else {
193           // we are going to reconstruct the mappings
194           // according to above rules
195
196           for (int i=startIdx;
197               i < componentList.size(); // check for end of componentList
198               i++) {
199
200               Location thisLoc = (Location) componentList.get(i);
201//               System.out.println("comparing " + newLoc + " against " + thisLoc);
202
203               // check if there are any other relevant entries
204               if (newLoc.getMin() > thisLoc.getMax()) break;
205
206               if (newLoc.contains(thisLoc)) {
207                   // eliminate this entry from both componentList
208                   // and from the components
209                   // old mapping can also be discarded
210                   components.remove(thisLoc);
211               }
212               else if (thisLoc.contains(newLoc)) {
213                   // throw away new entry
214                   return;
215               }
216               else if (newLoc.overlaps(thisLoc)) {
217                   // resolve the overlap
218                   if (newLoc.getMin() < thisLoc.getMin()) {
219                       // newLoc on left: correct right limit
220                       newLoc = new RangeLocation(newLoc.getMin(), thisLoc.getMin() - 1);
221                   }
222                   else {
223                       // right overlap
224                       newLoc = new RangeLocation(thisLoc.getMax() + 1, newLoc.getMax());
225                   }
226               }
227               else {
228                   // no overlap of any kind at all
229                   // this can only mean we can slot in the entry here
230//                   System.out.println("putting " + newLoc + " " + f.getLocation() + " " + f.getStrand() + " " + f.getComponentSequence().seqString());
231                   components.put(newLoc, new TranslatedSymbolList(f));
232                   recreateList();
233                   return;
234               }
235           }
236
237           // you can get here if you have "consumed" the last location
238           // or overlapped it.  It only remains to put in the new
239           // entry.  In this case, it is either going to be last
240           // (because you have consumed the last, or second last
241           // (because you have overlapped the last).
242//           System.out.println("putting " + newLoc + " " + f.getLocation() + " " + f.getStrand() + " " + f.getComponentSequence().seqString());
243           components.put(newLoc, new TranslatedSymbolList(f));
244           recreateList();
245           return;
246       }
247
248    }
249
250    private void recreateList()
251    {
252        componentList.clear();
253        componentList.addAll(components.keySet());
254    }
255
256    public void removeComponent(ComponentFeature f) {
257        // search thru current components to
258        // remove the specified component
259        for (int i=0; i< componentList.size(); i++) {
260            Location loc = (Location) componentList.get(i);
261            TranslatedSymbolList sl = (TranslatedSymbolList) components.get(loc);
262
263            if (sl.getFeature() != f) continue;
264
265            // have a match, remove from both componentList
266            // and component
267            // loc is now the mapping range of the to-be-removed feature
268            componentList.remove(i);
269
270            // at this point the former left and right neighbours of the
271            // removed feature are i-1 and i.
272
273            // check bounds of both neighbouring locations
274            // and extend if possible to cover the void
275            // left by the removal
276            // extension would not occur if the neighbours
277            // are not abutting to begin with
278            int minExtensionFromRight = loc.getMin();
279
280            // the way in which the locations are assembled
281            // mean that at most, the left and right neighbours can be extended
282            // to some point within the removed range.
283            if (i !=0 ) {
284                // there was a left neighbour, extend it if poss
285                Location leftLoc = (Location) componentList.get(i-1);
286                Location leftCFLoc = (Location) components.get(leftLoc);
287
288                if (leftCFLoc.getMax() > loc.getMin()) {
289                    // there is scope for extension
290                    // compute the right limit
291                    int newRightLimitOnLeftRange = Math.min(leftCFLoc.getMax(), loc.getMax());
292
293                    Location newLeftLoc = new RangeLocation(
294                        leftLoc.getMin(),
295                        newRightLimitOnLeftRange
296                        );
297
298                    // update all tables
299                    componentList.remove(i-1);
300                    componentList.add(i-1, newLeftLoc);
301                    Object value = components.remove(leftLoc);
302                    components.put(newLeftLoc, value);
303
304                    minExtensionFromRight = newRightLimitOnLeftRange + 1;
305                }
306            }
307
308            if (i != componentList.size()) { // we use i because that's the index of the right neighbour after deletion
309                // there was a right neighbour
310                Location rightLoc = (Location) componentList.get(i);
311                Location rightCFLoc = (Location) components.get(rightLoc);
312
313                if ((loc.getMax() > rightCFLoc.getMin()) && // did the removed range limit the right neighbour?
314                    (minExtensionFromRight < rightLoc.getMin()) ) { // is there any potential extension?
315                    
316                    // there is potential for extension
317                    int newLeftLimitOnRightRange = Math.max(minExtensionFromRight, rightCFLoc.getMin());
318
319                    Location newRightLoc = new RangeLocation(
320                        newLeftLimitOnRightRange,
321                        rightLoc.getMax()
322                        );
323
324                    // update all tables
325                    componentList.remove(i);
326                    componentList.add(i, newRightLoc);
327                    Object value = components.remove(rightLoc);
328                    components.put(newRightLoc, value);
329
330                }
331            }
332        }
333    }
334
335    private SymbolList getComponentSymbols(Location loc) {
336        // the only kind of object in this class is a TranslatedSymbolList
337        // this already handles the coordinate transformations
338        // I only need to select the right one
339        return (SymbolList) components.get(loc);
340    }
341
342
343    public Set getComponentLocationSet() {
344//      return components.keySet();
345        TreeSet newSet = new TreeSet(Location.naturalOrder);
346        newSet.addAll(componentList);
347        return newSet;
348    }
349
350    /**
351     * Find the location containing p in a sorted list of non-overlapping contiguous
352     * locations.
353     */
354
355    private Location lastLocation = Location.empty;
356
357    // searches for first range that contains the point.
358    // returns null on failure
359    private Location locationOfPoint(int p) {
360        if (lastLocation.contains(p)) {
361            return lastLocation;
362        }
363
364        int first = 0;
365        int last = componentList.size() - 1;
366
367        while (first <= last) {
368            int check = (first + last) / 2;
369            Location checkL = (Location) componentList.get(check);
370            if (checkL.contains(p)) {
371                lastLocation = checkL;
372                return checkL;
373            }
374
375            if (p < checkL.getMin()) {
376                last = check - 1;
377            } else {
378                first = check + 1;
379            }
380        }
381
382        return null;
383    }
384
385    private int NOTFOUND = -1;
386
387    private int idxRightOfPoint(int p)
388    {
389
390        int first = 0;
391        int last = componentList.size() - 1;
392
393        int check = 0;
394        Location checkL = null;
395        while (first <= last) {
396            check = (first + last) / 2;
397            checkL = (Location) componentList.get(check);
398            if (checkL.contains(p)) {
399//                System.out.println("idxRightOfPoint("+p+") returning " + check + " on " + checkL);
400                return check;
401            }
402
403            if (p < checkL.getMin())
404                last = check - 1;
405            else
406                first = check + 1;
407        }
408
409        try {
410            if (p < checkL.getMin()) {
411                return check;
412            } else {
413                int nextIdx = check+1;
414                if ((nextIdx < componentList.size())
415                    && (p < ((Location) componentList.get(nextIdx)).getMin()) )
416                    return nextIdx;
417                else
418                    return NOTFOUND;
419            }
420        }
421        catch (NullPointerException npe) {
422            // this deals with the start
423            // when there are no entries
424            return NOTFOUND;
425        }
426    }
427
428    public Alphabet getAlphabet() {
429        return DNATools.getDNA();
430    }
431
432    public int length() {
433      if(autoLength) {
434        int componentCount = componentList.size();
435
436        if (componentCount == 0)
437          // there's nothing in 'ere.
438          return 0;
439        else {
440          Location last = (Location) componentList.get(componentCount - 1);
441          return last.getMax();
442        }
443      } else {
444        return length;
445      }
446    }
447
448  public Symbol symbolAt(int pos) {
449      Location l = locationOfPoint(pos);
450
451      if (l == null) {
452//          System.out.println("symbol at " + pos + " is n");
453          return noninformativeSymbol;
454      }
455
456      SymbolList syms = getComponentSymbols(l);
457
458      try {
459//          System.out.println("symbol at " + pos + " is " + DNATools.getDNA().getTokenization("token").tokenizeSymbol(syms.symbolAt(pos)) 
460//          + " location is " + l + " symbolList is " + syms);
461      }
462      catch (Exception be) { be.printStackTrace(); }
463      return syms.symbolAt(pos);
464  }
465
466  public SymbolList subList(int start, int end) 
467  {
468    Location l = locationOfPoint(start);
469    if (l != null && l.contains(end)) {
470      SymbolList symbols = getComponentSymbols(l);
471//      int tstart = start - l.getMin() + 1;
472//      int tend = end - l.getMin() + 1;
473      return symbols.subList(start, end);
474    }
475
476    // All is lost.  Fall back onto `view' subList from AbstractSymbolList
477
478    return super.subList(start, end);
479  }
480/*
481  public Symbol symbolAt(int pos) {
482      // System.out.println(this + "  symbolAt(" + pos + ")");
483    Location l = locationOfPoint(pos);
484    if (l != null) {
485      SymbolList syms = getComponentSymbols(l);
486      return syms.symbolAt(pos - l.getMin() + 1);
487    }
488
489    return noninformativeSymbol;
490  }
491
492
493  public SymbolList subList(int start, int end) {
494      // System.out.println(this + "  subList(" + start + ", " + end + ")");
495    Location l = locationOfPoint(start);
496    if (l != null && l.contains(end)) {
497      SymbolList symbols = getComponentSymbols(l);
498      int tstart = start - l.getMin() + 1;
499      int tend = end - l.getMin() + 1;
500      return symbols.subList(tstart, tend);
501    }
502
503    // All is lost.  Fall back onto `view' subList from AbstractSymbolList
504
505    return super.subList(start, end);
506  }
507*/
508}