001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.chromatogram;
023
024import java.io.File;
025import java.io.FileInputStream;
026import java.io.IOException;
027import java.io.InputStream;
028
029import org.biojava.bio.program.abi.ABIFChromatogram;
030import org.biojava.bio.program.scf.SCF;
031import org.biojava.utils.io.CachingInputStream;
032
033/**
034 * A factory that creates {@link Chromatogram} objects from files or streams.
035 * In either case, the type of object to create is determined from the first
036 * four bytes of the stream (the magic number).
037 *
038 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>)
039 * @author Matthew Pocock
040 * @since 1.3
041 */
042public class ChromatogramFactory {
043    /**
044     *  The magic number for SCF files.
045     */
046    public static final int SCF_MAGIC = (((byte) '.') << 24)
047                                      + (((byte) 's') << 16)
048                                      + (((byte) 'c') << 8)
049                                      + (((byte) 'f'));
050    /**
051     *  The magic number for ABIF files.
052     */
053    public static final int ABI_MAGIC = (((byte) 'A') << 24)
054                                      + (((byte) 'B') << 16)
055                                      + (((byte) 'I') << 8)
056                                      + (((byte) 'F'));
057
058    /**
059     * Creates a new <code>Chromatogram</code> object from the named file.
060     * @param f the file to read
061     * @return a new Chromatogram
062     *
063     * @throws IOException when the file can't be read or some other I/O error occurs
064     * @throws UnsupportedChromatogramFormatException when the file doesn't
065     *         contain a chromatogram in a supported format
066     */
067    public static Chromatogram create(File f)
068    throws IOException, UnsupportedChromatogramFormatException {
069        FileInputStream fin = new FileInputStream(f);
070        int magic = magicFromStream(fin);
071        fin.close();
072
073        switch (magic) {
074        case SCF_MAGIC:
075            return SCF.create(f);
076        case ABI_MAGIC:
077            return ABIFChromatogram.create(f);
078        default:
079            throw new UnsupportedChromatogramFormatException("File "+f+" with magic "+magic+" has an unsupported format");
080        }
081    }
082
083    /**
084     * Creates a new <code>Chromatogram</code> object from the supplied stream.
085     * Note that for some chromatogram formats, this can be much more
086     * memory-intensive than reading from a file.
087     * <p>
088     * Note also that if the provided stream is a
089     * {@link org.biojava.utils.io.CachingInputStream}, it will be seeked
090     * back to 0 before being passed to the parser.  This is because the
091     * parsers that use <code>CachingInputStream</code> assume that the
092     * "file" starts at 0.
093     * </p>
094     *
095     * @param in the stream from which to read the chromatogram.
096     * @return a new Chromatogram
097     * @throws IOException when there's a problem with the stream
098     * @throws UnsupportedChromatogramFormatException when the file doesn't
099     *         contain a chromatogram in a supported format
100     */
101    public static Chromatogram create(InputStream in)
102    throws IOException, UnsupportedChromatogramFormatException {
103        CachingInputStream cin;
104        if (in instanceof CachingInputStream)
105            cin = (CachingInputStream) in;
106        else
107            cin = new CachingInputStream(in);
108        // parsers assume that the image of the file in the stream starts at
109        // the beginning of the stream-as-provided.  If the stream
110        // was a CachingInputStream, it needs to go to zero.
111        cin.seek(0);
112        int magic = magicFromStream(cin);
113        cin.seek(0);
114        switch (magic) {
115        case SCF_MAGIC:
116            // for SCF, we don't need the cache, so don't use it
117            return SCF.create(in, 4);
118        case ABI_MAGIC:
119            return ABIFChromatogram.create(cin);
120        default:
121            throw new UnsupportedChromatogramFormatException("The provided input stream with magic "+magic+" has an unsupported format");
122        }
123
124    }
125
126  /**
127   * Extract the magic number as an integer from a byte-array.
128   *
129   * <p>
130   * This assumes the magic array has at least 4 elements.
131   * </p>
132   *
133   * @param magic  the byte array of magic values
134   * @return the magic number integer
135   */
136    private static int makeMagic(byte[] magic) {
137        return (magic[0] << 24) | (magic[1] << 16) | (magic[2] << 8) | (magic[3]);
138    }
139
140    /**
141     * Reads the next four bytes from a stream to build a 32-bit magic number.
142     *
143     * @param src the source InputStream
144     * @return an integer representing the magic number
145     * @throws IOException if data could not be read from src
146     */
147    private static int magicFromStream(InputStream src) throws IOException {
148        byte[] magicBytes = new byte[4];
149        src.read(magicBytes);
150        return makeMagic(magicBytes);
151    }
152}