001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.bio.symbol;
022
023/**
024 * <p>
025 * An encapsulation of the way symbols map to bit-patterns.
026 * </p>
027 *
028 * <p>
029 * A packing will encapsulate the process of converting between symbols and
030 * bit-patterns. For example, in DNA you could use 00, 01, 10 and 11 to
031 * represent the four bases (a, g, c, t). Many applications may require a
032 * specific packing. You may need to store full ambiguity information, or
033 * perhaps you can discard this capability to reduce stoorage space. You may
034 * care about the bit-pattern produced because you need interoperability or
035 * an algorithm needs to be fed correctly, or you may not care about the
036 * packing at all. This interface is here to allow you to chose the most
037 * appropreate packing for your task.
038 * </p>
039 *
040 * @author Matthew Pocock
041 */
042public interface Packing {
043  /**
044   * The FiniteAlphabet this packing is for.
045   *
046   * @return  the FiniteAlphabet that we can pack
047   */
048  FiniteAlphabet getAlphabet();
049  
050  /**
051   * <p>
052   * Return a byte representing the packing of a symbol.
053   * The bits will be from 1 >> 0 through to 1 >> (wordSize - 1).
054   * </p>
055   *
056   * @param sym  the Symbol to pack
057   * @return  a byte containing the packed symbol
058   * @throws IllegalSymbolException if sym is not in getAlphabet().
059   */
060  byte pack(Symbol sym)
061  throws IllegalSymbolException;
062  
063  /**
064   * <p>
065   * Return the symbol for a packing.
066   * </p>
067   *
068   * @param packed  the byte pattern for a Symbol
069   * @return the Symbol that was packed
070   * @throws IllegalSymbolException if the packing doesn't represent a valid
071   *         Symbol
072   */
073  Symbol unpack(byte packed)
074  throws IllegalSymbolException;
075  
076  /**
077   * <p>
078   * The number of bits required to pack a symbol.
079   * </p>
080   *
081   * @return the word size as a byte
082   */
083  byte wordSize();
084  
085  /**
086   * <p>
087   * Flag to state if ambiguities are stored.
088   * </p>
089   *
090   * <p>
091   * Packings are free to either store ambiguity information or to discard
092   * it (presumably converting all ambiguities to a standard AtomicSymbol
093   * and then packing that). You can check wether ambiguities are handled
094   * by calling this method.
095   * </p>
096   *
097   * @return true if ambiguities are stored, false otherwise
098   */
099  boolean handlesAmbiguity();
100}