001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.chromatogram; 023 024import org.biojava.bio.alignment.Alignment; 025import org.biojava.bio.symbol.AtomicSymbol; 026import org.biojava.bio.symbol.IllegalSymbolException; 027 028/** 029 * Encapsulates the basic information you would want from a chromatogram. 030 * Read-only. 031 * 032 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>) 033 * @author Matthew Pocock 034 * @since 1.3 035 */ 036public interface Chromatogram { 037 /** 038 * The sequence label for the list of called bases. 039 */ 040 public static String DNA = "dna"; 041 042 /** 043 * The sequence label for the trace offsets of the called bases. 044 */ 045 public static String OFFSETS = "trace-offsets"; 046 047 /** Gets the max intensity from all the traces. Must be equivalent 048 * to the max of calling {@link #getMax(AtomicSymbol)} on each 049 * of the four non-ambiguous DNA nucleotides. 050 * @return the max intensity 051 */ 052 public int getMax(); 053 /** Gets the max intensity on the trace for the specified nucleotide. 054 * @param nucleotide the trace to examine. Must be a concrete 055 * (non-ambiguous) nucleotide from the DNA alphabet 056 * @throws IllegalSymbolException when the nucleotide isn't from the DNA 057 * alphabet 058 * @return the max intensity 059 */ 060 public int getMax(AtomicSymbol nucleotide) throws IllegalSymbolException; 061 062 /** Returns the length of the trace of the Chromatogram. 063 * @return the number of samples in the trace 064 * @see #getTrace(AtomicSymbol) 065 */ 066 public int getTraceLength(); 067 /** Returns an array containing the intensities of the sampled waveform 068 * representing the chromatogram trace for base <code>nucleotide</code>. 069 * This may be a reference the actual internal representation of the 070 * samples, so callers <b>must not modify it</b>. 071 * <p> 072 * The resulting array for each nucleotide must be {@link #getTraceLength} 073 * <code>int</code>s long. 074 * </p> 075 * @param nucleotide the trace to examine. Must be the symbol for A, C, G, or T 076 * as provided by {@link org.biojava.bio.seq.DNATools} 077 * @throws IllegalSymbolException if <code>nucleotide</code> isn't in the DNA alphabet 078 * @return an array of integers representing the values of a particular 079 * chromatogram trace. 080 */ 081 public int[] getTrace(AtomicSymbol nucleotide) throws IllegalSymbolException; 082 083 /** 084 * Returns the number of bits of the traces which are significant. For 085 * instance, if the chromatogram were originally encoded with a single byte 086 * per trace sample, this method must return 8. 087 * @return the number of significant bits 088 */ 089 public int getSignificantBits(); 090 091 /** 092 * Returns an alignment that describes the base calls for this chromatogram. 093 * All of the <code>SymbolList</code>s in this alignment must be the same 094 * length and that length must equal {@link #getSequenceLength}. 095 * <p> 096 * The alignment must contain, at the least, two sequences: 097 * </p> 098 * <ol> 099 * <li>A sequence containing the called bases. The alphabet of this list 100 * must be {@link org.biojava.bio.seq.DNATools#getDNA()}. 101 * The label for this list in the alignment must be 102 * <code>Chromatogram.DNA</code></li> 103 * <li>A sequence containing the trace offsets at which the called bases 104 * were called. The alphabet of this list must be an 105 * {@link org.biojava.bio.symbol.IntegerAlphabet} or a 106 * {@link org.biojava.bio.symbol.IntegerAlphabet.SubIntegerAlphabet}. 107 * The label for this list in the alignment must be 108 * <code>Chromatogram.OFFSETS</code>.</li> 109 * </ol> 110 * <p> 111 * Implementors may provide other sequences as they see fit. 112 * </p> 113 * @return an alignment of at least two sequences, as described above. 114 */ 115 public Alignment getBaseCalls(); 116 /** 117 * Returns the number of bases called by whatever base-calling software 118 * analyzed the chromatogram as loaded. Must equal 119 * <code>{@link #getBaseCalls}.length()</code>. 120 * @return the number of bases 121 */ 122 public int getSequenceLength(); 123 124 /** 125 * Returns a new <code>Chromatogram</code> representing the reverse 126 * complement of this one. 127 * <p> 128 * Implementors should copy the metadata about the chromatogram (i.e., base 129 * calls) as is appropriate to their formats. 130 * </p> 131 * @return a new chromatogram that is the reverse complement of this one 132 */ 133 public Chromatogram reverseComplement(); 134}