001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.impl; 023 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Set; 027import java.util.SortedMap; 028import java.util.TreeMap; 029 030import org.biojava.bio.BioError; 031import org.biojava.bio.seq.ComponentFeature; 032import org.biojava.bio.seq.DNATools; 033import org.biojava.bio.seq.StrandedFeature; 034import org.biojava.bio.symbol.AbstractSymbolList; 035import org.biojava.bio.symbol.Alphabet; 036import org.biojava.bio.symbol.IllegalAlphabetException; 037import org.biojava.bio.symbol.Location; 038import org.biojava.bio.symbol.Symbol; 039import org.biojava.bio.symbol.SymbolList; 040 041/** 042 * Support class for applications which need to patch together sections 043 * of sequence into a single SymbolList. This class isn't intended 044 * for direct use in user code -- instead, it is a helper for people 045 * implementing the full BioJava assembly model. See SimpleAssembly 046 * for an example. 047 * 048 * @author Thomas Down 049 * @author Greg Cox 050 * @author Matthew Pocock 051 * @since 1.1 052 */ 053 054public class AssembledSymbolList extends AbstractSymbolList { 055 private boolean autoLength = true; 056 private int length = 0; 057 private SortedMap components; 058 private List componentList; 059 060 private final Symbol noninformativeSymbol = DNATools.n(); 061 private final char noninformativeToken = 'n'; 062 063 { 064 components = new TreeMap(Location.naturalOrder); 065 componentList = new ArrayList(); 066 } 067 068 public void setLength(int len) { 069 autoLength = false; 070 length = len; 071 } 072 073 public void putComponent(ComponentFeature f) { 074 components.put(f.getLocation(), f); 075 componentList.clear(); 076 componentList.addAll(components.keySet()); 077 } 078 079 /** 080 * @throws IllegalArgumentException if sl would introduce a circularity 081 * into the tree of components 082 */ 083 public void putComponent(Location l, SymbolList sl) { 084 if(sl == this) { 085 throw new IllegalArgumentException("Circular reference"); 086 } 087 components.put(l, sl); 088 componentList.clear(); 089 componentList.addAll(components.keySet()); 090 } 091 092 public void removeComponent(Location loc) { 093 components.remove(loc); 094 componentList.clear(); 095 componentList.addAll(components.keySet()); 096 } 097 098 private SymbolList getComponentSymbols(Location loc) { 099 Object o = components.get(loc); 100 if (o instanceof ComponentFeature) { 101 ComponentFeature cf = (ComponentFeature) o; 102 SymbolList sl = cf.getSymbols(); 103 if (cf.getStrand() == StrandedFeature.NEGATIVE) { 104 try { 105 sl = DNATools.reverseComplement(sl); 106 } catch (IllegalAlphabetException ex) { 107 throw new BioError("Assertion failed: couldn't reverse-complement component symbols"); 108 } 109 } 110 return sl; 111 } else { 112 return (SymbolList) o; 113 } 114 } 115 116 public Set getComponentLocationSet() { 117 return components.keySet(); 118 } 119 120 /** 121 * Find the location containing p in a sorted list of non-overlapping contiguous 122 * locations. 123 */ 124 125 private Location lastLocation = Location.empty; 126 127 private Location locationOfPoint(int p) { 128 if (lastLocation.contains(p)) { 129 return lastLocation; 130 } 131 132 int first = 0; 133 int last = componentList.size() - 1; 134 135 while (first <= last) { 136 int check = (first + last) / 2; 137 Location checkL = (Location) componentList.get(check); 138 if (checkL.contains(p)) { 139 lastLocation = checkL; 140 return checkL; 141 } 142 143 if (p < checkL.getMin()) { 144 last = check - 1; 145 } else { 146 first = check + 1; 147 } 148 } 149 150 return null; 151 } 152 153 private Location locationUpstreamOfPoint(int p) { 154 int first = 0; 155 int last = componentList.size() - 1; 156 157 int check = 0; 158 Location checkL = null; 159 while (first <= last) { 160 check = (first + last) / 2; 161 checkL = (Location) componentList.get(check); 162 if (checkL.contains(p)) 163 return checkL; 164 165 if (p < checkL.getMin()) 166 last = check - 1; 167 else 168 first = check + 1; 169 } 170 171 try { 172 if (p < checkL.getMin()) { 173 return checkL; 174 } else { 175 return (Location) componentList.get(check + 1); 176 } 177 } catch (IndexOutOfBoundsException ex) { 178 return null; 179 } catch (NullPointerException ex) { 180 return null; 181 } 182 } 183 184 public Alphabet getAlphabet() { 185 return DNATools.getDNA(); 186 } 187 188 public int length() { 189 if(autoLength) { 190 int componentCount = componentList.size(); 191 192 if (componentCount == 0) 193 // there's nothing in 'ere. 194 return 0; 195 else { 196 Location last = (Location) componentList.get(componentCount - 1); 197 return last.getMax(); 198 } 199 } else { 200 return length; 201 } 202 } 203 204 public Symbol symbolAt(int pos) { 205 // System.out.println(this + " symbolAt(" + pos + ")"); 206 Location l = locationOfPoint(pos); 207 if (l != null) { 208 SymbolList syms = getComponentSymbols(l); 209 return syms.symbolAt(pos - l.getMin() + 1); 210 } 211 212 return noninformativeSymbol; 213 } 214 215 public SymbolList subList(int start, int end) { 216 // System.out.println(this + " subList(" + start + ", " + end + ")"); 217 Location l = locationOfPoint(start); 218 if (l != null && l.contains(end)) { 219 SymbolList symbols = getComponentSymbols(l); 220 int tstart = start - l.getMin() + 1; 221 int tend = end - l.getMin() + 1; 222 return symbols.subList(tstart, tend); 223 } 224 225 // All is lost. Fall back onto `view' subList from AbstractSymbolList 226 227 return super.subList(start, end); 228 } 229 230 public String subStr(int start, int end) { 231 if (start < 1 || end > length()) { 232 throw new IndexOutOfBoundsException("Range out of bounds: " + start + " - " + end); 233 } 234 235 StringBuffer sb = new StringBuffer(); 236 int pos = start; 237 while (pos <= end) { 238 Location loc = locationOfPoint(pos); 239 if (loc != null) { 240 SymbolList sl = getComponentSymbols(loc); 241 int slStart = Math.max(1, pos - loc.getMin() + 1); 242 int slEnd = Math.min(loc.getMax() - loc.getMin() + 1, 243 end - loc.getMin() + 1); 244 sb.append(sl.subStr(slStart, slEnd)); 245 pos += (slEnd - slStart + 1); 246 } else { 247 loc = locationUpstreamOfPoint(pos); 248 int numNs; 249 if (loc != null) { 250 numNs = Math.min(loc.getMin(), end + 1) - pos; 251 pos = loc.getMin(); 252 } else { 253 numNs = end - pos + 1; 254 pos = end + 1; 255 } 256 for (int i = 0; i < numNs; ++i) { 257 sb.append(noninformativeToken); 258 } 259 } 260 } 261 262 return sb.substring(0); 263 } 264}