001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.chromatogram;
023
024import org.biojava.bio.alignment.Alignment;
025import org.biojava.bio.symbol.AtomicSymbol;
026import org.biojava.bio.symbol.IllegalSymbolException;
027
028/** 
029 * Encapsulates the basic information you would want from a chromatogram.
030 * Read-only.
031 *
032 *  @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>)
033 * @author Matthew Pocock
034 * @since 1.3
035 */
036public interface Chromatogram {
037    /**
038     * The sequence label for the list of called bases.
039     */
040    public static String DNA = "dna";
041
042    /**
043     * The sequence label for the trace offsets of the called bases.
044     */
045    public static String OFFSETS = "trace-offsets";
046    
047    /** Gets the max intensity from all the traces.  Must be equivalent
048     *  to the max of calling {@link #getMax(AtomicSymbol)} on each
049     *  of the four non-ambiguous DNA nucleotides.
050     *  @return the max intensity 
051     */
052    public int getMax();
053    /** Gets the max intensity on the trace for the specified nucleotide.
054     *  @param nucleotide the trace to examine.  Must be a concrete 
055     *         (non-ambiguous) nucleotide from the DNA alphabet
056     *  @throws IllegalSymbolException when the nucleotide isn't from the DNA
057     *          alphabet
058     *  @return the max intensity
059     */
060    public int getMax(AtomicSymbol nucleotide) throws IllegalSymbolException;
061    
062    /** Returns the length of the trace of the Chromatogram.
063     *  @return the number of samples in the trace
064     *  @see #getTrace(AtomicSymbol)
065     */
066    public int getTraceLength();
067    /** Returns an array containing the intensities of the sampled waveform
068     *  representing the chromatogram trace for base <code>nucleotide</code>.  
069     *  This may be a reference the actual internal representation of the 
070     *  samples, so callers <b>must not modify it</b>.
071     *  <p>
072     *  The resulting array for each nucleotide must be {@link #getTraceLength}
073     *  <code>int</code>s long.
074     *  </p>
075     *  @param nucleotide the trace to examine.  Must be the symbol for A, C, G, or T
076     *         as provided by {@link org.biojava.bio.seq.DNATools}
077     *  @throws IllegalSymbolException if <code>nucleotide</code> isn't in the DNA alphabet
078     *  @return an array of integers representing the values of a particular 
079     *          chromatogram trace.
080     */
081    public int[] getTrace(AtomicSymbol nucleotide) throws IllegalSymbolException;
082    
083    /** 
084     * Returns the number of bits of the traces which are significant.  For 
085     * instance, if the chromatogram were originally encoded with a single byte 
086     * per trace sample, this method must return 8.
087     * @return the number of significant bits
088     */
089    public int getSignificantBits();
090
091    /**
092     * Returns an alignment that describes the base calls for this chromatogram.
093     * All of the <code>SymbolList</code>s in this alignment must be the same
094     * length and that length must equal {@link #getSequenceLength}.
095     * <p>
096     * The alignment must contain, at the least, two sequences:
097     * </p>
098     * <ol>
099     *   <li>A sequence containing the called bases.  The alphabet of this list
100     *       must be {@link org.biojava.bio.seq.DNATools#getDNA()}.  
101     *       The label for this list in the alignment must be 
102     *       <code>Chromatogram.DNA</code></li>
103     *   <li>A sequence containing the trace offsets at which the called bases 
104     *       were called.  The alphabet of this list must be an 
105     *       {@link org.biojava.bio.symbol.IntegerAlphabet} or a
106     *       {@link org.biojava.bio.symbol.IntegerAlphabet.SubIntegerAlphabet}.
107     *       The label for this list in the alignment must be 
108     *       <code>Chromatogram.OFFSETS</code>.</li>
109     * </ol>
110     * <p>
111     * Implementors may provide other sequences as they see fit.
112     * </p>
113     * @return an alignment of at least two sequences, as described above.
114     */
115    public Alignment getBaseCalls();
116    /** 
117     * Returns the number of bases called by whatever base-calling software
118     * analyzed the chromatogram as loaded.  Must equal 
119     * <code>{@link #getBaseCalls}.length()</code>.
120     * @return the number of bases
121     */
122    public int getSequenceLength();
123    
124    /** 
125     * Returns a new <code>Chromatogram</code> representing the reverse
126     * complement of this one.
127     * <p>
128     * Implementors should copy the metadata about the chromatogram (i.e., base 
129     * calls) as is appropriate to their formats.
130     * </p>
131     * @return a new chromatogram that is the reverse complement of this one
132     */
133    public Chromatogram reverseComplement();
134}