001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022/** 023 * <p>AbstractULAlignment is an abstract base class for alignments 024 * where the constituent sequences have unequal lengths.</p> 025 * 026 * @author David Waring 027 * @author Mark Schreiber (docs and some minor changes) 028 */ 029 030package org.biojava.bio.alignment; 031 032import java.io.Serializable; 033import java.util.AbstractList; 034import java.util.ArrayList; 035import java.util.Comparator; 036import java.util.Iterator; 037import java.util.List; 038import java.util.NoSuchElementException; 039import java.util.Set; 040import java.util.SortedSet; 041import java.util.TreeSet; 042import java.util.Vector; 043 044import org.biojava.bio.BioError; 045import org.biojava.bio.symbol.AbstractSymbolList; 046import org.biojava.bio.symbol.Alphabet; 047import org.biojava.bio.symbol.IllegalSymbolException; 048import org.biojava.bio.symbol.Location; 049import org.biojava.bio.symbol.LocationTools; 050import org.biojava.bio.symbol.RangeLocation; 051import org.biojava.bio.symbol.Symbol; 052import org.biojava.bio.symbol.SymbolList; 053 054public abstract class AbstractULAlignment extends AbstractSymbolList implements 055 UnequalLengthAlignment { 056 057 protected Alphabet alphabet; 058 059 /** 060 * this will return the ambiguity symbol associated with all symbols in that 061 * column 062 */ 063 064 public Symbol symbolAt(int index) { 065 try { 066 return alphabet.getSymbol(new ColAsList<Symbol>(index)); 067 } catch (IllegalSymbolException ire) { 068 throw new BioError( 069 "Somehow my crossproduct alphabet is incompatible with column " 070 + index, ire); 071 } 072 } 073 074 public List<String> labelsAt(int column) { 075 return labelsInRange(new RangeLocation(column, column)); 076 } 077 078 public List<String> labelsInRange(Location loc) { 079 List<String> labels = getLabels(); 080 Location seqLoc; 081 String label; 082 List<String> labelList = new Vector<String>(); 083 084 for (Iterator<String> labelIterator = labels.iterator(); labelIterator 085 .hasNext();) { 086 label = labelIterator.next(); 087 seqLoc = locInAlignment(label); 088 if (LocationTools.overlaps(loc, seqLoc)) { 089 labelList.add(label); 090 } 091 } 092 return labelList; 093 } 094 095 public Iterator<SymbolList> symbolListIterator() { 096 return new Alignment.SymbolListIterator(this); 097 } 098 099 protected void debug(String s) { 100 System.out.println(s); 101 } 102 103 /** 104 * leftMost and rightMost return labels. If there are more than one that 105 * start at the same location it returns the longest, if they are the same 106 * length it returns the first one it found; 107 */ 108 public Object leftMost() { 109 List<String> labels = getLabels(); 110 Object leftMost = null; 111 Object label; 112 Location leftLoc = null; 113 Location loc = null; 114 115 for (Iterator<String> labelIterator = labels.iterator(); labelIterator 116 .hasNext();) { 117 label = labelIterator.next(); 118 loc = locInAlignment(label); 119 if (leftMost == null) { 120 leftMost = label; 121 leftLoc = loc; 122 // loc = loc; 123 } 124 if (loc.getMin() < leftLoc.getMin()) { 125 leftMost = label; 126 leftLoc = loc; 127 } else if (loc.getMin() == leftLoc.getMin()) { 128 if ((loc.getMax() - loc.getMin()) > (leftLoc.getMax() - leftLoc 129 .getMin())) { 130 leftMost = label; 131 leftLoc = loc; 132 } 133 } 134 } 135 return leftMost(); 136 } 137 138 public Object rightMost() { 139 List<String> labels = getLabels(); 140 Object rightMost = null; 141 Object label; 142 Location rightLoc = null; 143 Location loc = null; 144 145 for (Iterator<String> labelIterator = labels.iterator(); labelIterator 146 .hasNext();) { 147 label = labelIterator.next(); 148 loc = locInAlignment(label); 149 if (rightMost == null) { 150 rightMost = label; 151 rightLoc = loc; 152 // loc= loc; 153 } 154 if (loc.getMin() > rightLoc.getMin()) { 155 rightMost = label; 156 rightLoc = loc; 157 } else if (loc.getMin() == rightLoc.getMin()) { 158 if ((loc.getMax() - loc.getMin()) > (rightLoc.getMax() - rightLoc 159 .getMin())) { 160 rightMost = label; 161 rightLoc = loc; 162 } 163 } 164 } 165 return rightMost(); 166 } 167 168 /** 169 * Retrieves a subalignment specified by the location. 170 * <p> 171 * <b>WARNING:</b> It is assumed that the location is contiguous. If the 172 * location is non-contiguous it may be preferable to use a block iterator 173 * to retrieve each sub location independently. 174 * 175 * @see #subAlignment(Set labels, int min, int max) 176 */ 177 public Alignment subAlignment(Set<String> labels, Location loc) 178 throws IndexOutOfBoundsException { 179 return new SubULAlignment(labels, loc); 180 } 181 182 /** 183 * Retreives a subAlignment 184 * 185 * @param labels 186 * the labels of the <code>SymbolLists</code> to be in the 187 * Alignment 188 * @param min 189 * the left most coordinate 190 * @param max 191 * the right most coordinate 192 * @return an Alignment 193 * @throws NoSuchElementException 194 * if one of the values in <code>labels</code> is not in the 195 * parent alignment 196 */ 197 public Alignment subAlignment(Set<String> labels, int min, int max) 198 throws NoSuchElementException { 199 return subAlignment(labels, LocationTools.makeLocation(min, max)); 200 } 201 202 /** 203 * 204 * @param comp 205 * @return 206 */ 207 public SortedSet<String> orderedLables(Comparator<String> comp) { 208 TreeSet<String> orderedSet = new TreeSet<String>(comp); 209 orderedSet.addAll(getLabels()); 210 return orderedSet; 211 } 212 213 // //////////////////////////////////////////// 214 // //////////////////////////////////////////// 215 // INNER CLASSES 216 // //////////////////////////////////////////// 217 // //////////////////////////////////////////// 218 219 private final class ColAsList<T extends Symbol> extends AbstractList<T> 220 implements Serializable { 221 /** 222 * Generated serial version identifier 223 */ 224 private static final long serialVersionUID = 4923954639110962683L; 225 private final int col; 226 private List<String> labels; 227 228 /** 229 * 230 * @param col 231 */ 232 public ColAsList(int col) { 233 this.col = col; 234 labels = getLabels(); 235 } 236 237 // protected ColAsList() { 238 // this.col = 0; 239 // } 240 241 @SuppressWarnings("unchecked") 242 public T get(int indx) { 243 return (T) symbolAt(labels.get(indx), col); 244 } 245 246 /* 247 * (non-Javadoc) 248 * @see java.util.AbstractCollection#size() 249 */ 250 public int size() { 251 return labels.size(); 252 } 253 } 254 255 /** 256 * Orders by location left to right. If they both start at the same location 257 * (o1.getMin() == o2.getMin()) it the larger of the two considered to the 258 * Left 259 */ 260 261 public class LeftRightLocationComparator<T> implements Comparator<T> { 262 public int compare(Object o1, Object o2) { 263 int ret = 1; 264 Location loc1, loc2; 265 loc1 = locInAlignment(o1); 266 loc2 = locInAlignment(o2); 267 if (loc1.getMin() > loc2.getMin()) { 268 ret = 1; 269 } else if (loc1.getMin() < loc2.getMin()) { 270 ret = -1; 271 } else if (loc1.getMin() == loc2.getMin()) { 272 int s1 = (loc1.getMax() - loc1.getMin()) + 1; 273 int s2 = (loc2.getMax() - loc2.getMin()) + 1; 274 if (s1 == s2) { 275 ret = 1; 276 } else { 277 ret = s2 - s1; 278 } 279 } 280 // debug (" result " + ret); 281 return ret; 282 } 283 } 284 285 // ///////////// 286 // This inner class should take care of all subAlignments 287 // ///////////// 288 289 public class SubULAlignment extends AbstractSymbolList implements 290 UnequalLengthAlignment { 291 private int start, end; 292 private List<String> subLabels; // will be left null if constructed with null 293 294 // Set of labels 295 296 // this allows labels added to underlying Alignment to be seen 297 // unless it was constructed with a Set of labels NOT 298 299 protected SubULAlignment(Set<String> labels, Location loc) 300 throws IndexOutOfBoundsException { 301 this.start = loc.getMin(); 302 this.end = loc.getMax(); 303 if (start < 1 || end > AbstractULAlignment.this.length()) { 304 throw new IndexOutOfBoundsException(); 305 } 306 if (labels != null) { 307 subLabels = new ArrayList<String>(); 308 subLabels.addAll(labels); 309 } else { 310 subLabels = AbstractULAlignment.this 311 .labelsInRange(new RangeLocation(start, end)); 312 } 313 314 } 315 316 /** 317 * realPosition is the position in the underlying Alignment 318 * corresponding to a position in the subAlignment 319 */ 320 321 private int realPosition(int pos) { 322 return pos + start - 1; 323 } 324 325 // /////////////////////// 326 // methods from Interface UnequalLengthAlignment 327 // ////////////////////// 328 public int length() { 329 return end - start + 1; 330 } 331 332 /** 333 * The location of an individual SymbolList relative to overall 334 * Alignment 335 */ 336 public Location locInAlignment(Object label) { 337 Location origLoc = AbstractULAlignment.this.locInAlignment(label); 338 int min = origLoc.getMin() - start + 1; 339 int max = origLoc.getMax() - start + 1; 340 return new RangeLocation(min, max); 341 } 342 343 public Alignment subAlignment(Set<String> labels, Location loc) 344 throws NoSuchElementException { 345 int min = realPosition(loc.getMin()); 346 int max = realPosition(loc.getMax()); 347 // if the Subalignment has a limited subLabel we want to keep labels 348 // of the sub sub aligment limited to that list too 349 350 if (labels == null) { 351 labels = new TreeSet<String>(subLabels); 352 } else { 353 Vector<String> l = new Vector<String>(labels); 354 labels = new TreeSet<String>(listIntersection(l, subLabels)); 355 } 356 // debug (" labels now " + labels); 357 return new SubULAlignment(labels, new RangeLocation(min, max)); 358 } 359 360 protected List<String> listIntersection(List<String> s1, List<String> s2) { 361 List<String> common = new Vector<String>(s1); 362 363 for (Iterator<String> i = s1.iterator(); i.hasNext();) { 364 Object label = i.next(); 365 if (!s2.contains(label)) { 366 common.remove(label); 367 } 368 } 369 return common; 370 } 371 372 public List<String> labelsAt(int column) throws IndexOutOfBoundsException { 373 return labelsInRange(new RangeLocation(column, column)); 374 } 375 376 public List<String> labelsInRange(Location loc) 377 throws IndexOutOfBoundsException { 378 // debug ("looking for labelsInRange " + loc.getMin() + "-" + 379 // loc.getMax()); 380 int min = realPosition(loc.getMin()); 381 int max = realPosition(loc.getMax()); 382 if (min < start || max > end) { 383 throw new IndexOutOfBoundsException(); 384 } 385 return listIntersection(subLabels, AbstractULAlignment.this 386 .labelsInRange(new RangeLocation(min, max))); 387 // List aLabels = AbstractULAlignment.this.labelsInRange(new 388 // RangeLocation(min,max)); 389 // List sLabels = AbstractULAlignment.this.labelsInRange(new 390 // RangeLocation(min,max)); 391 // for (Iterator i = aLabels.iterator();i.hasNext();){ 392 // Object label = i.next(); 393 // if ( ! subLabels.contains(label)){ 394 // sLabels.remove(label); 395 // } 396 // } 397 // return sLabels; 398 399 } 400 401 // /////////////////////// 402 // methods from Interface Alignment 403 // ////////////////////// 404 public List<String> getLabels() { 405 return subLabels; 406 } 407 408 /* 409 * (non-Javadoc) 410 * @see org.biojava.bio.alignment.Alignment#symbolAt(java.lang.String, int) 411 */ 412 public Symbol symbolAt(String label, int column) 413 throws NoSuchElementException { 414 return AbstractULAlignment.this.symbolAt(label, 415 realPosition(column)); 416 } 417 418 /* 419 * (non-Javadoc) 420 * @see org.biojava.bio.symbol.SymbolList#symbolAt(int) 421 */ 422 public Symbol symbolAt(int column) throws NoSuchElementException { 423 return AbstractULAlignment.this.symbolAt(realPosition(column)); 424 } 425 426 /* 427 * (non-Javadoc) 428 * @see org.biojava.bio.alignment.Alignment#symbolListForLabel(java.lang.String) 429 */ 430 public SymbolList symbolListForLabel(String label) 431 throws NoSuchElementException { 432 return AbstractULAlignment.this.symbolListForLabel(label); 433 } 434 435 /* 436 * (non-Javadoc) 437 * @see org.biojava.bio.symbol.SymbolList#getAlphabet() 438 */ 439 public Alphabet getAlphabet() { 440 return AbstractULAlignment.this.getAlphabet(); 441 } 442 443 /* 444 * (non-Javadoc) 445 * @see org.biojava.bio.alignment.Alignment#symbolListIterator() 446 */ 447 public Iterator<SymbolList> symbolListIterator() { 448 return new Alignment.SymbolListIterator(this); 449 } 450 } 451}