001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.utils.io;
023
024import java.io.EOFException;
025import java.io.IOException;
026import java.io.InputStream;
027
028/** 
029 * A wrapper around {@link java.io.InputStream} that provides in-memory
030 * caching of the input data.  This allows it to provide a {@link #seek}
031 * method, which lets the user use an {@link java.io.InputStream} like a 
032 * {@link java.io.RandomAccessFile} (with appropriate caveats about memory 
033 * footprint, security, and performance).
034 * <p>
035 * This class has not been tested with very long input streams.  It might choke.
036 * </p>
037 *
038 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>)
039 */
040public class CachingInputStream extends InputStream implements Seekable {
041    private final static int INIT_CACHE_SIZE = 1024;
042    private final static int RESIZE_FACTOR = 3;
043    
044    /** The byte cache itself. */
045    protected byte[] cache;
046    /** The 0-based index into cache of the _next_ byte to return.  If
047     *  ptr == validLen, data must be read from the stream into the cache. */
048    protected int ptr;
049    /** A count of the number of bytes in {@link #cache} that contain
050     *  data read from the stream. */
051    protected int validLen;
052    /** The underlying input stream whose data we're caching */
053    protected InputStream in;
054    
055    public CachingInputStream(InputStream in) {
056        this.in = in;
057        cache = new byte[INIT_CACHE_SIZE];
058        ptr = validLen = 0;
059    }
060    
061    public void seek(long pos) throws IOException {
062        if (pos > Integer.MAX_VALUE || pos < 0) {
063            throw new IllegalArgumentException("Cannot seek to " 
064                    + pos + ": can only do 0 <= seek < " + Integer.MAX_VALUE);
065        }
066        int newPtr = (int) pos;
067        if (newPtr <= validLen) {
068            ptr = newPtr;
069        }
070        else {
071            skip(newPtr - ptr);
072        }
073    }
074    
075    public int read() throws IOException {
076        if (ptr < validLen) {
077            int out = 0xFF & cache[ptr];
078            ptr++;
079            return out;
080        }
081        else {
082            int read = in.read();
083            if (read >= 0) {
084                expandCache(1);
085                cache[ptr] = (byte) read;
086                ptr++;
087            }
088            return read;
089        }
090    }
091    
092    public int read(byte[] b, int start, int len) throws IOException {
093        int cachedLen = Math.min( Math.max(validLen - ptr, 0) , len );
094        // copy the cached bytes to b, if any
095        System.arraycopy(cache, ptr, b, start, cachedLen);
096        ptr += cachedLen;
097        // read additional bytes from the stream, if any
098        int bytesRead = in.read(b, start + cachedLen, len - cachedLen);
099        // copy newly read bytes into cache
100        expandCache(bytesRead);
101        System.arraycopy(b, start+cachedLen, cache, ptr, bytesRead);
102        ptr += bytesRead;
103        return bytesRead + cachedLen;
104    }
105    
106    // FIXME: assumes ptr == validLen - 1
107    public long skip(long num) throws IOException {
108        if (ptr + num > Integer.MAX_VALUE)
109            return 0;
110        int n = (int) num;
111        // skip through as much cache as exists up to n
112        int availCache = Math.min(validLen - ptr, n);
113        n -= availCache;
114        ptr += availCache;
115        // read any additional "skipped" bytes into cache
116        expandCache(n);
117        int i = 0, count;
118        IOException ioEx = null;
119        try {
120            while (i < n) {
121                count = in.read(cache, ptr + i, n - i);
122                if (count < 0)
123                    break;
124                else
125                    i += count;
126            }
127        }
128        catch (EOFException e) { }
129        catch (IOException e) { ioEx = e; }
130        // if we couldn't skip enough bytes from the stream, 
131        // mark those bytes invalid in the cache
132        validLen -= (n - i);
133        // update the pointer to indicate the skipped bytes
134        ptr += i;
135        // we save and rethrow the IOException in case the user of the code
136        // tries to recover from the IOException -- this way there's a 
137        // nonzero chance that ptr and validLen will still be valid
138        if (ioEx != null)
139            throw ioEx;
140        // return the total number of bytes skipped with both methods
141        return i + availCache;
142    }
143    
144    /** Expands the cache to hold some number of <code>additionalBytes</code>.
145     *  Expansion is done multiplicatively for efficiency. Immediately after
146     *  calling this method, you must fill the additional bytes from the stream
147     *  because this method also updates validLen.  */
148    protected void expandCache(int additionalBytes) {
149        if (cache.length < validLen + additionalBytes) {
150            int newLen = cache.length;
151            while (newLen < validLen + additionalBytes)
152                newLen *= RESIZE_FACTOR;
153            byte[] newCache = new byte[newLen];
154            System.arraycopy(cache, 0, newCache, 0, cache.length);
155            cache = newCache;
156        }
157        validLen += additionalBytes;
158    }
159}