001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.chromatogram; 023 024import java.util.ArrayList; 025import java.util.HashMap; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.NoSuchElementException; 030 031import org.biojava.bio.BioError; 032import org.biojava.bio.alignment.Alignment; 033import org.biojava.bio.alignment.SimpleAlignment; 034import org.biojava.bio.seq.DNATools; 035import org.biojava.bio.symbol.Alphabet; 036import org.biojava.bio.symbol.AtomicSymbol; 037import org.biojava.bio.symbol.IllegalAlphabetException; 038import org.biojava.bio.symbol.IllegalSymbolException; 039import org.biojava.bio.symbol.IntegerAlphabet; 040import org.biojava.bio.symbol.SimpleSymbolList; 041import org.biojava.bio.symbol.SymbolList; 042import org.biojava.bio.symbol.SymbolListViews; 043import org.biojava.utils.ChangeListener; 044 045/** 046 * A basic, abstract implementation of {@link Chromatogram}. Provides 047 * protected setters so that subclasses may set the value of the various 048 * properties of a chromatogram. 049 * 050 * Chromatograms should be created using {@link ChromatogramFactory} or a 051 * parser for a particular file format. 052 * 053 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>) 054 * @author Matthew Pocock 055 * @since 1.3 056 * 057 * 058 */ 059public abstract class AbstractChromatogram implements Chromatogram { 060 private static int A = 0; // Array index for 'A'-related material 061 private static int C = 1; // Array index for 'C'-related material 062 private static int G = 2; // Array index for 'G'-related material 063 private static int T = 3; // Array index for 'T'-related material 064 065 /** 2D array containing all the samples for all 4 traces. */ 066 private int[][] traceSample; 067 /** Array containing the single highest value for each trace. */ 068 private int[] maxTraceValue; 069 /** The immutable Alignment that will be returned by getBaseCalls(). */ 070 private Alignment baseCalls; 071 private int significantBits; 072 073 /** 074 * Create a new AbstractChromatogram. 075 */ 076 public AbstractChromatogram() { 077 maxTraceValue = new int[4]; 078 traceSample = new int[4][]; 079 significantBits = 0; 080 } 081 082 public int[] getTrace(AtomicSymbol nucleotide) 083 throws IllegalSymbolException { 084 return traceSample[nucToIndex(nucleotide)]; 085 } 086 087 public int getTraceLength() { 088 return traceSample[0].length; 089 } 090 091 public int getMax() { 092 try { 093 return Math.max( 094 Math.max(getMax(DNATools.a()), getMax(DNATools.c())), 095 Math.max(getMax(DNATools.g()), getMax(DNATools.t())) 096 ); 097 } catch (IllegalSymbolException ise) { 098 throw new BioError("Can't happen", ise); 099 } 100 } 101 102 public int getMax(AtomicSymbol nucleotide) throws IllegalSymbolException { 103 return maxTraceValue[nucToIndex(nucleotide)]; 104 } 105 106 /** 107 * Return the total number of base calls. 108 * 109 * @return the total number of base calls 110 */ 111 public Alignment getBaseCalls() { return baseCalls; } 112 113 /** 114 * Return the sequence length. 115 * 116 * @return the sequence length 117 */ 118 public int getSequenceLength() { return baseCalls.length(); } 119 120 /** 121 * Return the number of significant bits. 122 * 123 * @return the significant bits 124 */ 125 public int getSignificantBits() { return significantBits; } 126 127 /** 128 * Convert a DNA symbol to an internal array index. 129 * 130 * @param nuc the nucleotide to convert 131 * @return an integer giving it's internal index value 132 * @throws IllegalSymbolException if the symbol is not recognised 133 */ 134 private final int nucToIndex(AtomicSymbol nuc) throws IllegalSymbolException { 135 if (nuc == DNATools.a()) return A; 136 else if (nuc == DNATools.c()) return C; 137 else if (nuc == DNATools.g()) return G; 138 else if (nuc == DNATools.t()) return T; 139 else 140 throw new IllegalSymbolException("The symbol " + 141 nuc.getName() + " (" + 142 nuc.getClass().getName() + 143 ") is not in the DNA alphabet"); 144 } 145 146 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 147 * Protected mutators for subclasses to initialize fields 148 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 149 150 /** 151 * Provides the list of base calls. 152 * @param align the base call alignment 153 * @throws NoSuchElementException when align doesn't contain alignments with 154 * the required DNA and OFFSETS labels 155 * @throws IllegalArgumentException the lists in align aren't all the same length. 156 * @throws IllegalAlphabetException if the required lists don't have the 157 * correct alphabets. See the documentation of 158 * {@link Chromatogram#getBaseCalls() } for details. 159 * @see Chromatogram#getBaseCalls 160 */ 161 protected final void setBaseCallAlignment(Alignment align) 162 throws IllegalAlphabetException, IllegalArgumentException, 163 NoSuchElementException { 164 SymbolList dna, offsets; 165 try { 166 dna = align.symbolListForLabel(Chromatogram.DNA); 167 offsets = align.symbolListForLabel(Chromatogram.OFFSETS); 168 } catch (NoSuchElementException nsee) { 169 throw nsee; 170 } 171 if (dna.getAlphabet() != DNATools.getDNA()) 172 throw new IllegalAlphabetException("DNA list has inappropriate alphabet"); 173 if (! (offsets.getAlphabet() instanceof IntegerAlphabet || 174 offsets.getAlphabet() instanceof IntegerAlphabet.SubIntegerAlphabet)) 175 throw new IllegalAlphabetException("Offsets list has inappropriate alphabet"); 176 177 baseCalls = align; 178 } 179 180 /** 181 * Sets the trace data structures to null. If a subclass needs to replace 182 * the traces with new traces of a different length, this method must be 183 * called first to avoid provoking an IllegalArgumentException from setTrace 184 * due to a length mismatch. 185 */ 186 protected final void clearTraces() { 187 maxTraceValue[A] = maxTraceValue[C] = maxTraceValue[G] 188 = maxTraceValue[T] = Integer.MIN_VALUE; 189 traceSample[A] = traceSample[C] = traceSample[G] 190 = traceSample[T] = null; 191 } 192 193 /** 194 * Provides the trace samples for a particular nucleotide. 195 * @param nuc A DNA nucleotide 196 * @param trace the trace samples themselves 197 * @param maxVal the maximum value in the trace array. If this value 198 * is {@link Integer#MIN_VALUE}, this method will do a linear 199 * search of trace to determine the max. 200 * @throws IllegalArgumentException when trace.length is different 201 * from any of the existing (non-null) traces 202 * @throws IllegalSymbolException when nuc is not a concrete DNA nucleotide 203 */ 204 protected final void setTrace(AtomicSymbol nuc, int[] trace, int maxVal) 205 throws IllegalArgumentException, IllegalSymbolException { 206 int idx = nucToIndex(nuc); 207 if (trace == null) { 208 traceSample[idx] = null; 209 maxTraceValue[idx] = Integer.MIN_VALUE; 210 } 211 else { 212 // check for length mismatches 213 for (int i = 0 ; i < 4 ; i++) { 214 if (traceSample[i] != null && traceSample[i].length != trace.length) { 215 throw new IllegalArgumentException( 216 "All traces must be the same length. " + trace.length + 217 " != " + traceSample[i].length); 218 } 219 } 220 // special case to do linear search for max 221 if (maxVal == Integer.MIN_VALUE) { 222 maxVal = trace[0]; 223 for (int i = 1 ; i < trace.length ; i++) 224 maxVal = Math.max(maxVal, trace[i]); 225 } 226 traceSample[idx] = trace; 227 maxTraceValue[idx] = maxVal; 228 } 229 } 230 231 /** Sets the number of significant bits in the trace samples. 232 * @param bits a non-negative integer indicating the number of 233 * significant bits in each trace sample 234 * @throws IllegalArgumentException when <code>bits</code> is negative 235 */ 236 protected final void setBits(int bits) 237 throws IllegalArgumentException { 238 if (bits < 0) 239 throw new IllegalArgumentException("Invalid number of significant bits"); 240 this.significantBits = bits; 241 } 242 243 /** Returns a new instance of this AbstractChromatogram subclass for use in 244 * {@link #reverseComplement}. 245 * 246 * @return a reverse-complemented AbstractChromatogram 247 */ 248 protected abstract AbstractChromatogram reverseComplementInstance(); 249 250 public Chromatogram reverseComplement() { 251 AbstractChromatogram rev = this.reverseComplementInstance(); 252 try { 253 rev.setTrace(DNATools.a(), reverse(traceSample[T]), maxTraceValue[T]); 254 rev.setTrace(DNATools.c(), reverse(traceSample[G]), maxTraceValue[G]); 255 rev.setTrace(DNATools.g(), reverse(traceSample[C]), maxTraceValue[C]); 256 rev.setTrace(DNATools.t(), reverse(traceSample[A]), maxTraceValue[A]); 257 } catch (IllegalSymbolException ise) { 258 throw new BioError( "Can't happen -- all symbols are explicit and legal", ise); 259 } 260 Alignment revBC = reverseComplementBaseCalls(); 261 try { 262 rev.setBaseCallAlignment(revBC); 263 } catch (IllegalAlphabetException iae) { 264 throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", iae); 265 } catch (NoSuchElementException nsee) { 266 throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", nsee); 267 } catch (IllegalArgumentException iae) { 268 throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", iae); 269 } 270 rev.setBits(this.significantBits); 271 return rev; 272 } 273 274 /** 275 * Returns a new base call alignment that is the reverse complement of 276 * one in this chromatogram. This is achieved by calling 277 * {@link #reverseComplementBaseCallList} for each label in the current 278 * base call alignment. When that method returns null, no list will 279 * appear in reverse complement base call alignment with the null-provoking 280 * label. For this reason, subclasses are encouraged to override 281 * {@link #reverseComplementBaseCallList} to handle any additional per-base 282 * metadata that they store. 283 * <p> 284 * This implementation should be safely inheritable 285 * for all chromatogram implementations, unless one just doesn't want 286 * base calls on its reverse complement output. If this is the case, 287 * it should override this method to return null. 288 * </p> 289 * 290 * @return a new {@link Alignment} that is the reverse complement of the 291 * one in the current chromatogram 292 */ 293 protected Alignment reverseComplementBaseCalls() { 294 Alignment current = this.getBaseCalls(); 295 Map revBCMap = new HashMap(); 296 Object curLabel; 297 SymbolList revList; 298 for (Iterator it = current.getLabels().iterator() ; it.hasNext() ;) { 299 curLabel = it.next(); 300 revList = reverseComplementBaseCallList(curLabel); 301 if (revList != null) 302 revBCMap.put(curLabel, revList); 303 } 304 return createImmutableAlignment(revBCMap); 305 } 306 307 /** 308 * Return a symbol list containing the reverse complement of the base call 309 * data for the given label. The returned list will be stored in the 310 * reverse complement's base call alignment under the same label. 311 * <p> 312 * Implementation note: subclasses which do not use an {@link IntegerAlphabet} for 313 * their offsets lists must override this method, at least for the case where 314 * <code>label == {@link #OFFSETS}</code>. 315 * </p> 316 * 317 * @param label the label Object 318 * @return an appropriately reverse-complemented SymbolList, or null if the 319 * label is unhandled. 320 */ 321 protected SymbolList reverseComplementBaseCallList(Object label) { 322 if (label == DNA) { 323 try { 324 return SymbolListViews.reverse(DNATools.complement(this.getBaseCalls().symbolListForLabel(DNA))); 325 } catch (IllegalAlphabetException iae) { 326 throw new BioError("Can't happen unless the DNA list has been set improperly", iae); 327 } 328 } 329 else if (label == OFFSETS) { 330 SymbolList curOffsets = this.getBaseCalls().symbolListForLabel(OFFSETS); 331 List revOffsets = new ArrayList(curOffsets.length()); 332 IntegerAlphabet alpha = (IntegerAlphabet) curOffsets.getAlphabet(); 333 IntegerAlphabet.IntegerSymbol sym; 334 for (int i = curOffsets.length() ; i > 0 ; i--) { 335 sym = (IntegerAlphabet.IntegerSymbol) curOffsets.symbolAt(i); 336 revOffsets.add(alpha.getSymbol(this.getTraceLength() - sym.intValue())); 337 } 338 try { 339 return createImmutableSymbolList(alpha, revOffsets); 340 } catch (IllegalSymbolException ise) { 341 throw new BioError("Can't happen -- revOffsets was just created with only symbols from alpha"); 342 } 343 } 344 else { 345 return null; 346 } 347 } 348 349 /** 350 * A factory method for creating new symbol lists with a given alphabet. 351 * The default implementation should be fine for nearly all cases, but the 352 * option is given in case a developer wishes to use a more memory 353 * efficient implementation. 354 * 355 * @param alpha the {@link Alphabet} for the new list 356 * @param syms the symbols to put in the new list 357 * @throws IllegalSymbolException when alpha and syms are incompatible 358 * @throws ClassCastException when any object in syms isn't a Symbol 359 * @return a new immutable SymbolList containing all the given symbols using 360 * the given Alphabet 361 */ 362 protected SymbolList createImmutableSymbolList(Alphabet alpha, List syms) 363 throws IllegalSymbolException, ClassCastException { 364 SymbolList symlist = new SimpleSymbolList(alpha, syms); 365 symlist.addChangeListener(ChangeListener.ALWAYS_VETO); 366 return symlist; 367 } 368 369 /** 370 * A factory method for creating new immutable alignments, particularly 371 * for use as base call alignments. The default implementation should 372 * be fine for nearly all cases. 373 * 374 * @param labelsToSymLists a {@link Map} whose keys are desired labels 375 * for the alignment and whose values are the SymbolLists. 376 * All the SymbolLists must be the same length. 377 * @return a new Alignment 378 * @throws IllegalArgumentException if the lists aren't all the same length 379 * @throws ClassCastException if any of the values in the map aren't 380 * SymbolLists 381 */ 382 protected Alignment createImmutableAlignment(Map labelsToSymLists) 383 throws IllegalArgumentException, ClassCastException { 384 Alignment sa = new SimpleAlignment(labelsToSymLists); 385 sa.addChangeListener(ChangeListener.ALWAYS_VETO); 386 return sa; 387 } 388 389 /** 390 * Utility method for reversing an int[] array. Visible for subclass use. 391 * 392 * @param src the source array 393 * @return an array of the same length containing all values in src in 394 * reverse order 395 */ 396 protected final static int[] reverse(int[] src) { 397 int[] dst = new int[src.length]; 398 for (int i = 0 ; i < src.length ; i++) 399 dst[src.length - i - 1] = src[i]; 400 return dst; 401 } 402}