001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.chromatogram;
023
024import java.util.ArrayList;
025import java.util.HashMap;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.NoSuchElementException;
030
031import org.biojava.bio.BioError;
032import org.biojava.bio.alignment.Alignment;
033import org.biojava.bio.alignment.SimpleAlignment;
034import org.biojava.bio.seq.DNATools;
035import org.biojava.bio.symbol.Alphabet;
036import org.biojava.bio.symbol.AtomicSymbol;
037import org.biojava.bio.symbol.IllegalAlphabetException;
038import org.biojava.bio.symbol.IllegalSymbolException;
039import org.biojava.bio.symbol.IntegerAlphabet;
040import org.biojava.bio.symbol.SimpleSymbolList;
041import org.biojava.bio.symbol.SymbolList;
042import org.biojava.bio.symbol.SymbolListViews;
043import org.biojava.utils.ChangeListener;
044
045/**
046 * A basic, abstract implementation of {@link Chromatogram}.  Provides
047 * protected setters so that subclasses may set the value of the various
048 * properties of a chromatogram.
049 *
050 * Chromatograms should be created using {@link ChromatogramFactory} or a
051 * parser for a particular file format.
052 * 
053 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>)
054 * @author Matthew Pocock
055 * @since 1.3
056 *
057 * 
058 */
059public abstract class AbstractChromatogram implements Chromatogram {
060    private static int A = 0; // Array index for 'A'-related material
061    private static int C = 1; // Array index for 'C'-related material
062    private static int G = 2; // Array index for 'G'-related material
063    private static int T = 3; // Array index for 'T'-related material
064
065    /** 2D array containing all the samples for all 4 traces. */
066    private int[][] traceSample;
067    /** Array containing the single highest value for each trace. */
068    private int[] maxTraceValue;
069    /** The immutable Alignment that will be returned by getBaseCalls(). */
070    private Alignment baseCalls;
071    private int significantBits;
072
073  /**
074   * Create a new AbstractChromatogram.
075   */
076    public AbstractChromatogram() {
077        maxTraceValue = new int[4];
078        traceSample = new int[4][];
079        significantBits = 0;
080    }
081
082    public int[] getTrace(AtomicSymbol nucleotide)
083    throws IllegalSymbolException {
084        return traceSample[nucToIndex(nucleotide)];
085    }
086
087    public int getTraceLength() {
088        return traceSample[0].length;
089    }
090
091    public int getMax() {
092        try {
093            return Math.max(
094                    Math.max(getMax(DNATools.a()), getMax(DNATools.c())),
095                    Math.max(getMax(DNATools.g()), getMax(DNATools.t()))
096                   );
097        } catch (IllegalSymbolException ise) {
098            throw new BioError("Can't happen", ise);
099        }
100    }
101
102    public int getMax(AtomicSymbol nucleotide) throws IllegalSymbolException {
103        return maxTraceValue[nucToIndex(nucleotide)];
104    }
105
106  /**
107   * Return the total number of base calls.
108   *
109   * @return the total number of base calls
110   */
111    public Alignment getBaseCalls()  { return baseCalls; }
112
113  /**
114   * Return the sequence length.
115   *
116   * @return the sequence length
117   */
118    public int getSequenceLength()   { return baseCalls.length(); }
119
120  /**
121   * Return the number of significant bits.
122   *
123   * @return the significant bits
124   */
125    public int getSignificantBits()  { return significantBits; }
126
127    /**
128     * Convert a DNA symbol to an internal array index.
129     *
130     * @param nuc  the nucleotide to convert
131     * @return an integer giving it's internal index value
132     * @throws IllegalSymbolException if the symbol is not recognised
133     */
134    private final int nucToIndex(AtomicSymbol nuc) throws IllegalSymbolException {
135        if      (nuc == DNATools.a()) return A;
136        else if (nuc == DNATools.c()) return C;
137        else if (nuc == DNATools.g()) return G;
138        else if (nuc == DNATools.t()) return T;
139        else
140            throw new IllegalSymbolException("The symbol " +
141                nuc.getName() + " (" +
142                nuc.getClass().getName() +
143                ") is not in the DNA alphabet");
144    }
145
146    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
147     * Protected mutators for subclasses to initialize fields
148     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
149
150    /**
151     * Provides the list of base calls.
152     * @param align the base call alignment
153     * @throws NoSuchElementException when align doesn't contain alignments with
154     *         the required DNA and OFFSETS labels
155     * @throws IllegalArgumentException the lists in align aren't all the same length.
156     * @throws IllegalAlphabetException if the required lists don't have the
157     *         correct alphabets.  See the documentation of
158     *         {@link Chromatogram#getBaseCalls() } for details.
159     * @see Chromatogram#getBaseCalls
160     */
161    protected final void setBaseCallAlignment(Alignment align)
162    throws IllegalAlphabetException, IllegalArgumentException,
163           NoSuchElementException {
164        SymbolList dna, offsets;
165        try {
166            dna = align.symbolListForLabel(Chromatogram.DNA);
167            offsets = align.symbolListForLabel(Chromatogram.OFFSETS);
168        } catch (NoSuchElementException nsee) {
169            throw nsee;
170        }
171        if (dna.getAlphabet() != DNATools.getDNA())
172            throw new IllegalAlphabetException("DNA list has inappropriate alphabet");
173        if (! (offsets.getAlphabet() instanceof IntegerAlphabet ||
174               offsets.getAlphabet() instanceof IntegerAlphabet.SubIntegerAlphabet))
175            throw new IllegalAlphabetException("Offsets list has inappropriate alphabet");
176
177        baseCalls = align;
178    }
179
180    /**
181     * Sets the trace data structures to null.  If a subclass needs to replace
182     * the traces with new traces of a different length, this method must be
183     * called first to avoid provoking an IllegalArgumentException from setTrace
184     * due to a length mismatch.
185     */
186    protected final void clearTraces() {
187        maxTraceValue[A] = maxTraceValue[C] = maxTraceValue[G]
188                         = maxTraceValue[T] = Integer.MIN_VALUE;
189        traceSample[A] = traceSample[C] = traceSample[G]
190                       = traceSample[T] = null;
191    }
192
193    /**
194     * Provides the trace samples for a particular nucleotide.
195     * @param nuc A DNA nucleotide
196     * @param trace the trace samples themselves
197     * @param maxVal the maximum value in the trace array.  If this value
198     *        is {@link Integer#MIN_VALUE}, this method will do a linear
199     *        search of trace to determine the max.
200     * @throws IllegalArgumentException when trace.length is different
201     *         from any of the existing (non-null) traces
202     * @throws IllegalSymbolException when nuc is not a concrete DNA nucleotide
203     */
204    protected final void setTrace(AtomicSymbol nuc, int[] trace, int maxVal)
205    throws IllegalArgumentException, IllegalSymbolException {
206        int idx = nucToIndex(nuc);
207        if (trace == null) {
208            traceSample[idx] = null;
209            maxTraceValue[idx] = Integer.MIN_VALUE;
210        }
211        else {
212            // check for length mismatches
213            for (int i = 0 ; i < 4 ; i++) {
214                if (traceSample[i] != null && traceSample[i].length != trace.length) {
215                    throw new IllegalArgumentException(
216                        "All traces must be the same length.  " + trace.length  +
217                        " != " + traceSample[i].length);
218                }
219            }
220            // special case to do linear search for max
221            if (maxVal == Integer.MIN_VALUE) {
222                maxVal = trace[0];
223                for (int i = 1 ; i < trace.length ; i++)
224                    maxVal = Math.max(maxVal, trace[i]);
225            }
226            traceSample[idx] = trace;
227            maxTraceValue[idx] = maxVal;
228        }
229    }
230
231    /** Sets the number of significant bits in the trace samples.
232     *  @param bits a non-negative integer indicating the number of
233     *         significant bits in each trace sample
234     *  @throws IllegalArgumentException when <code>bits</code> is negative
235     */
236    protected final void setBits(int bits)
237    throws IllegalArgumentException {
238        if (bits < 0)
239            throw new IllegalArgumentException("Invalid number of significant bits");
240        this.significantBits = bits;
241    }
242
243    /** Returns a new instance of this AbstractChromatogram subclass for use in
244     * {@link #reverseComplement}.
245     *
246     * @return a reverse-complemented AbstractChromatogram
247     */
248    protected abstract AbstractChromatogram reverseComplementInstance();
249
250    public Chromatogram reverseComplement() {
251        AbstractChromatogram rev = this.reverseComplementInstance();
252        try {
253            rev.setTrace(DNATools.a(), reverse(traceSample[T]), maxTraceValue[T]);
254            rev.setTrace(DNATools.c(), reverse(traceSample[G]), maxTraceValue[G]);
255            rev.setTrace(DNATools.g(), reverse(traceSample[C]), maxTraceValue[C]);
256            rev.setTrace(DNATools.t(), reverse(traceSample[A]), maxTraceValue[A]);
257        } catch (IllegalSymbolException ise) {
258            throw new BioError( "Can't happen -- all symbols are explicit and legal", ise);
259        }
260        Alignment revBC = reverseComplementBaseCalls();
261        try {
262            rev.setBaseCallAlignment(revBC);
263        } catch (IllegalAlphabetException iae) {
264            throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", iae);
265        } catch (NoSuchElementException nsee) {
266            throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", nsee);
267        } catch (IllegalArgumentException iae) {
268            throw new BioError("Can't happen unless reverseComplementBaseCalls or reverseComplementBaseCallList have been overridden out-of-spec", iae);
269        }
270        rev.setBits(this.significantBits);
271        return rev;
272    }
273
274    /**
275     * Returns a new base call alignment that is the reverse complement of
276     * one in this chromatogram.  This is achieved by calling
277     * {@link #reverseComplementBaseCallList} for each label in the current
278     * base call alignment.  When that method returns null, no list will
279     * appear in reverse complement base call alignment with the null-provoking
280     * label.  For this reason, subclasses are encouraged to override
281     * {@link #reverseComplementBaseCallList} to handle any additional per-base
282     * metadata that they store.
283     * <p>
284     * This implementation should be safely inheritable
285     * for all chromatogram implementations, unless one just doesn't want
286     * base calls on its reverse complement output.  If this is the case,
287     * it should override this method to return null.
288     * </p>
289     *
290     * @return a new {@link Alignment} that is the reverse complement of the
291     *          one in the current chromatogram
292     */
293    protected Alignment reverseComplementBaseCalls() {
294        Alignment current = this.getBaseCalls();
295        Map revBCMap = new HashMap();
296        Object curLabel;
297        SymbolList revList;
298        for (Iterator it = current.getLabels().iterator() ; it.hasNext() ;) {
299            curLabel = it.next();
300            revList = reverseComplementBaseCallList(curLabel);
301            if (revList != null)
302                revBCMap.put(curLabel, revList);
303        }
304        return createImmutableAlignment(revBCMap);
305    }
306
307    /**
308     * Return a symbol list containing the reverse complement of the base call
309     * data for the given label.  The returned list will be stored in the
310     * reverse complement's base call alignment under the same label.
311     * <p>
312     * Implementation note: subclasses which do not use an {@link IntegerAlphabet} for
313     * their offsets lists must override this method, at least for the case where
314     * <code>label == {@link #OFFSETS}</code>.
315     * </p>
316     *
317     * @param label the label Object
318     * @return an appropriately reverse-complemented SymbolList, or null if the
319     *          label is unhandled.
320     */
321    protected SymbolList reverseComplementBaseCallList(Object label) {
322        if (label == DNA) {
323            try {
324                return SymbolListViews.reverse(DNATools.complement(this.getBaseCalls().symbolListForLabel(DNA)));
325            } catch (IllegalAlphabetException iae) {
326                throw new BioError("Can't happen unless the DNA list has been set improperly", iae);
327            }
328        }
329        else if (label == OFFSETS) {
330            SymbolList curOffsets = this.getBaseCalls().symbolListForLabel(OFFSETS);
331            List revOffsets = new ArrayList(curOffsets.length());
332            IntegerAlphabet alpha = (IntegerAlphabet) curOffsets.getAlphabet();
333            IntegerAlphabet.IntegerSymbol sym;
334            for (int i = curOffsets.length() ; i > 0 ; i--) {
335                sym = (IntegerAlphabet.IntegerSymbol) curOffsets.symbolAt(i);
336                revOffsets.add(alpha.getSymbol(this.getTraceLength() - sym.intValue()));
337            }
338            try {
339                return createImmutableSymbolList(alpha, revOffsets);
340            } catch (IllegalSymbolException ise) {
341                throw new BioError("Can't happen -- revOffsets was just created with only symbols from alpha");
342            }
343        }
344        else {
345            return null;
346        }
347    }
348
349    /**
350     * A factory method for creating new symbol lists with a given alphabet.
351     * The default implementation should be fine for nearly all cases, but the
352     * option is given in case a developer wishes to use a more memory
353     * efficient implementation.
354     *
355     * @param alpha the {@link Alphabet} for the new list
356     * @param syms the symbols to put in the new list
357     * @throws IllegalSymbolException when alpha and syms are incompatible
358     * @throws ClassCastException when any object in syms isn't a Symbol
359     * @return a new immutable SymbolList containing all the given symbols using
360     *          the given Alphabet
361     */
362    protected SymbolList createImmutableSymbolList(Alphabet alpha, List syms)
363    throws IllegalSymbolException, ClassCastException {
364        SymbolList symlist = new SimpleSymbolList(alpha, syms);
365        symlist.addChangeListener(ChangeListener.ALWAYS_VETO);
366        return symlist;
367    }
368
369    /**
370     * A factory method for creating new immutable alignments, particularly
371     * for use as base call alignments.  The default implementation should
372     * be fine for nearly all cases.
373     *
374     * @param labelsToSymLists a {@link Map} whose keys are desired labels
375     *        for the alignment and whose values are the SymbolLists.
376     *        All the SymbolLists must be the same length.
377     * @return a new Alignment
378     * @throws IllegalArgumentException if the lists aren't all the same length
379     * @throws ClassCastException if any of the values in the map aren't
380     *         SymbolLists
381     */
382    protected Alignment createImmutableAlignment(Map labelsToSymLists)
383    throws IllegalArgumentException, ClassCastException {
384        Alignment sa = new SimpleAlignment(labelsToSymLists);
385        sa.addChangeListener(ChangeListener.ALWAYS_VETO);
386        return sa;
387    }
388
389    /**
390     *  Utility method for reversing an int[] array. Visible for subclass use.
391     *
392     * @param src  the source array
393     * @return an array of the same length containing all values in src in
394     *     reverse order
395     */
396    protected final static int[] reverse(int[] src) {
397        int[] dst = new int[src.length];
398        for (int i = 0 ; i < src.length ; i++)
399            dst[src.length - i - 1] = src[i];
400        return dst;
401    }
402}