001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.impl;
023
024import java.util.ArrayList;
025import java.util.List;
026import java.util.Set;
027import java.util.SortedMap;
028import java.util.TreeMap;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.seq.ComponentFeature;
032import org.biojava.bio.seq.DNATools;
033import org.biojava.bio.seq.StrandedFeature;
034import org.biojava.bio.symbol.AbstractSymbolList;
035import org.biojava.bio.symbol.Alphabet;
036import org.biojava.bio.symbol.IllegalAlphabetException;
037import org.biojava.bio.symbol.Location;
038import org.biojava.bio.symbol.Symbol;
039import org.biojava.bio.symbol.SymbolList;
040
041/**
042 * Support class for applications which need to patch together sections
043 * of sequence into a single SymbolList.  This class isn't intended
044 * for direct use in user code -- instead, it is a helper for people
045 * implementing the full BioJava assembly model.  See SimpleAssembly
046 * for an example.
047 *
048 * @author Thomas Down
049 * @author Greg Cox
050 * @author Matthew Pocock
051 * @since 1.1
052 */
053
054public class AssembledSymbolList extends AbstractSymbolList {
055    private boolean autoLength = true;
056    private int length = 0;
057    private SortedMap components;
058    private List componentList;
059
060    private final Symbol noninformativeSymbol = DNATools.n();
061    private final char noninformativeToken = 'n';
062
063    {
064        components = new TreeMap(Location.naturalOrder);
065        componentList = new ArrayList();
066    }
067
068    public void setLength(int len) {
069        autoLength = false;
070        length = len;
071    }
072
073    public void putComponent(ComponentFeature f) {
074        components.put(f.getLocation(), f);
075        componentList.clear();
076        componentList.addAll(components.keySet());
077    }
078
079    /**
080     * @throws IllegalArgumentException if sl would introduce a circularity
081     *   into the tree of components
082     */
083    public void putComponent(Location l, SymbolList sl) {
084      if(sl == this) {
085        throw new IllegalArgumentException("Circular reference");
086      }
087        components.put(l, sl);
088        componentList.clear();
089        componentList.addAll(components.keySet());
090    }
091
092    public void removeComponent(Location loc) {
093        components.remove(loc);
094        componentList.clear();
095        componentList.addAll(components.keySet());
096    }
097
098    private SymbolList getComponentSymbols(Location loc) {
099        Object o = components.get(loc);
100        if (o instanceof ComponentFeature) {
101            ComponentFeature cf = (ComponentFeature) o;
102            SymbolList sl = cf.getSymbols();
103            if (cf.getStrand() == StrandedFeature.NEGATIVE) {
104                try {
105                    sl = DNATools.reverseComplement(sl);
106                } catch (IllegalAlphabetException ex) {
107                    throw new BioError("Assertion failed: couldn't reverse-complement component symbols");
108                }
109            }
110            return sl;
111        } else {
112            return (SymbolList) o;
113        }
114    }
115
116    public Set getComponentLocationSet() {
117        return components.keySet();
118    }
119
120    /**
121     * Find the location containing p in a sorted list of non-overlapping contiguous
122     * locations.
123     */
124
125    private Location lastLocation = Location.empty;
126
127    private Location locationOfPoint(int p) {
128        if (lastLocation.contains(p)) {
129            return lastLocation;
130        }
131
132        int first = 0;
133        int last = componentList.size() - 1;
134
135        while (first <= last) {
136            int check = (first + last) / 2;
137            Location checkL = (Location) componentList.get(check);
138            if (checkL.contains(p)) {
139                lastLocation = checkL;
140                return checkL;
141            }
142
143            if (p < checkL.getMin()) {
144                last = check - 1;
145            } else {
146                first = check + 1;
147            }
148        }
149
150        return null;
151    }
152
153    private Location locationUpstreamOfPoint(int p) {
154        int first = 0;
155        int last = componentList.size() - 1;
156
157        int check = 0;
158        Location checkL = null;
159        while (first <= last) {
160            check = (first + last) / 2;
161            checkL = (Location) componentList.get(check);
162            if (checkL.contains(p))
163                return checkL;
164
165            if (p < checkL.getMin())
166                last = check - 1;
167            else
168                first = check + 1;
169        }
170
171        try {
172            if (p < checkL.getMin()) {
173                return checkL;
174            } else {
175                return (Location) componentList.get(check + 1);
176            }
177        } catch (IndexOutOfBoundsException ex) {
178            return null;
179        } catch (NullPointerException ex) {
180            return null;
181        }
182    }
183
184    public Alphabet getAlphabet() {
185        return DNATools.getDNA();
186    }
187
188    public int length() {
189      if(autoLength) {
190        int componentCount = componentList.size();
191
192        if (componentCount == 0)
193          // there's nothing in 'ere.
194          return 0;
195        else {
196          Location last = (Location) componentList.get(componentCount - 1);
197          return last.getMax();
198        }
199      } else {
200        return length;
201      }
202    }
203
204  public Symbol symbolAt(int pos) {
205      // System.out.println(this + "  symbolAt(" + pos + ")");
206    Location l = locationOfPoint(pos);
207    if (l != null) {
208      SymbolList syms = getComponentSymbols(l);
209      return syms.symbolAt(pos - l.getMin() + 1);
210    }
211
212    return noninformativeSymbol;
213  }
214
215  public SymbolList subList(int start, int end) {
216      // System.out.println(this + "  subList(" + start + ", " + end + ")");
217    Location l = locationOfPoint(start);
218    if (l != null && l.contains(end)) {
219      SymbolList symbols = getComponentSymbols(l);
220      int tstart = start - l.getMin() + 1;
221      int tend = end - l.getMin() + 1;
222      return symbols.subList(tstart, tend);
223    }
224
225    // All is lost.  Fall back onto `view' subList from AbstractSymbolList
226
227    return super.subList(start, end);
228  }
229
230    public String subStr(int start, int end) {
231        if (start < 1 || end > length()) {
232            throw new IndexOutOfBoundsException("Range out of bounds: " + start + " - " + end);
233        }
234
235        StringBuffer sb = new StringBuffer();
236        int pos = start;
237        while (pos <= end) {
238            Location loc = locationOfPoint(pos);
239            if (loc != null) {
240                SymbolList sl = getComponentSymbols(loc);
241                int slStart = Math.max(1, pos - loc.getMin() + 1);
242                int slEnd = Math.min(loc.getMax() - loc.getMin() + 1,
243                                     end - loc.getMin() + 1);
244                sb.append(sl.subStr(slStart, slEnd));
245                pos += (slEnd - slStart + 1);
246            } else {
247                loc = locationUpstreamOfPoint(pos);
248                int numNs;
249                if (loc != null) {
250                    numNs = Math.min(loc.getMin(), end + 1) - pos;
251                    pos = loc.getMin();
252                } else {
253                    numNs = end - pos + 1;
254                    pos = end + 1;
255                }
256                for (int i = 0; i < numNs; ++i) {
257                    sb.append(noninformativeToken);
258                }
259            }
260        }
261
262        return sb.substring(0);
263    }
264}