001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.utils.io; 023 024import java.io.EOFException; 025import java.io.IOException; 026import java.io.InputStream; 027 028/** 029 * A wrapper around {@link java.io.InputStream} that provides in-memory 030 * caching of the input data. This allows it to provide a {@link #seek} 031 * method, which lets the user use an {@link java.io.InputStream} like a 032 * {@link java.io.RandomAccessFile} (with appropriate caveats about memory 033 * footprint, security, and performance). 034 * <p> 035 * This class has not been tested with very long input streams. It might choke. 036 * </p> 037 * 038 * @author Rhett Sutphin (<a href="http://genome.uiowa.edu/">UI CBCB</a>) 039 */ 040public class CachingInputStream extends InputStream implements Seekable { 041 private final static int INIT_CACHE_SIZE = 1024; 042 private final static int RESIZE_FACTOR = 3; 043 044 /** The byte cache itself. */ 045 protected byte[] cache; 046 /** The 0-based index into cache of the _next_ byte to return. If 047 * ptr == validLen, data must be read from the stream into the cache. */ 048 protected int ptr; 049 /** A count of the number of bytes in {@link #cache} that contain 050 * data read from the stream. */ 051 protected int validLen; 052 /** The underlying input stream whose data we're caching */ 053 protected InputStream in; 054 055 public CachingInputStream(InputStream in) { 056 this.in = in; 057 cache = new byte[INIT_CACHE_SIZE]; 058 ptr = validLen = 0; 059 } 060 061 public void seek(long pos) throws IOException { 062 if (pos > Integer.MAX_VALUE || pos < 0) { 063 throw new IllegalArgumentException("Cannot seek to " 064 + pos + ": can only do 0 <= seek < " + Integer.MAX_VALUE); 065 } 066 int newPtr = (int) pos; 067 if (newPtr <= validLen) { 068 ptr = newPtr; 069 } 070 else { 071 skip(newPtr - ptr); 072 } 073 } 074 075 public int read() throws IOException { 076 if (ptr < validLen) { 077 int out = 0xFF & cache[ptr]; 078 ptr++; 079 return out; 080 } 081 else { 082 int read = in.read(); 083 if (read >= 0) { 084 expandCache(1); 085 cache[ptr] = (byte) read; 086 ptr++; 087 } 088 return read; 089 } 090 } 091 092 public int read(byte[] b, int start, int len) throws IOException { 093 int cachedLen = Math.min( Math.max(validLen - ptr, 0) , len ); 094 // copy the cached bytes to b, if any 095 System.arraycopy(cache, ptr, b, start, cachedLen); 096 ptr += cachedLen; 097 // read additional bytes from the stream, if any 098 int bytesRead = in.read(b, start + cachedLen, len - cachedLen); 099 // copy newly read bytes into cache 100 expandCache(bytesRead); 101 System.arraycopy(b, start+cachedLen, cache, ptr, bytesRead); 102 ptr += bytesRead; 103 return bytesRead + cachedLen; 104 } 105 106 // FIXME: assumes ptr == validLen - 1 107 public long skip(long num) throws IOException { 108 if (ptr + num > Integer.MAX_VALUE) 109 return 0; 110 int n = (int) num; 111 // skip through as much cache as exists up to n 112 int availCache = Math.min(validLen - ptr, n); 113 n -= availCache; 114 ptr += availCache; 115 // read any additional "skipped" bytes into cache 116 expandCache(n); 117 int i = 0, count; 118 IOException ioEx = null; 119 try { 120 while (i < n) { 121 count = in.read(cache, ptr + i, n - i); 122 if (count < 0) 123 break; 124 else 125 i += count; 126 } 127 } 128 catch (EOFException e) { } 129 catch (IOException e) { ioEx = e; } 130 // if we couldn't skip enough bytes from the stream, 131 // mark those bytes invalid in the cache 132 validLen -= (n - i); 133 // update the pointer to indicate the skipped bytes 134 ptr += i; 135 // we save and rethrow the IOException in case the user of the code 136 // tries to recover from the IOException -- this way there's a 137 // nonzero chance that ptr and validLen will still be valid 138 if (ioEx != null) 139 throw ioEx; 140 // return the total number of bytes skipped with both methods 141 return i + availCache; 142 } 143 144 /** Expands the cache to hold some number of <code>additionalBytes</code>. 145 * Expansion is done multiplicatively for efficiency. Immediately after 146 * calling this method, you must fill the additional bytes from the stream 147 * because this method also updates validLen. */ 148 protected void expandCache(int additionalBytes) { 149 if (cache.length < validLen + additionalBytes) { 150 int newLen = cache.length; 151 while (newLen < validLen + additionalBytes) 152 newLen *= RESIZE_FACTOR; 153 byte[] newCache = new byte[newLen]; 154 System.arraycopy(cache, 0, newCache, 0, cache.length); 155 cache = newCache; 156 } 157 validLen += additionalBytes; 158 } 159}