001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.sequence.io; 022 023import java.io.FileReader; 024import java.io.IOException; 025import java.io.InputStreamReader; 026import java.io.Reader; 027 028/** 029 * Need to keep track of actual bytes read and take advantage of buffered reader 030 * performance. Took java source for BufferedReader and added BytesRead functionality<br> 031 * ---------- original buffered reader ----------------------<BR> 032 * Reads text from a character-input stream, buffering characters so as to 033 * provide for the efficient reading of characters, arrays, and lines. 034 * 035 * <p> The buffer size may be specified, or the default size may be used. The 036 * default is large enough for most purposes. 037 * 038 * <p> In general, each read request made of a Reader causes a corresponding 039 * read request to be made of the underlying character or byte stream. It is 040 * therefore advisable to wrap a BufferedReaderBytesRead around any Reader whose read() 041 * operations may be costly, such as FileReaders and InputStreamReaders. For 042 * example, 043 * 044 * <pre> 045 * BufferedReaderBytesRead in 046 * = new BufferedReaderBytesRead(new FileReader("foo.in")); 047 * </pre> 048 * 049 * will buffer the input from the specified file. Without buffering, each 050 * invocation of read() or readLine() could cause bytes to be read from the 051 * file, converted into characters, and then returned, which can be very 052 * inefficient. 053 * 054 * <p> Programs that use DataInputStreams for textual input can be localized by 055 * replacing each DataInputStream with an appropriate BufferedReaderBytesRead. 056 * 057 * @see FileReader 058 * @see InputStreamReader 059 * 060 * @version 1.37, 06/03/15 061 * @author Mark Reinhold 062 * @author Scooter Willis <willishf at gmail dot com> 063 * @since JDK1.1 064 */ 065public class BufferedReaderBytesRead extends Reader { 066 067 private Reader in; 068 private char[] cb; 069 private int nChars, nextChar; 070 private static final int INVALIDATED = -2; 071 private static final int UNMARKED = -1; 072 private int markedChar = UNMARKED; 073 private int readAheadLimit = 0; /* Valid only when markedChar > 0 */ 074 075 /** If the next character is a line feed, skip it */ 076 private boolean skipLF = false; 077 /** The skipLF flag when the mark was set */ 078 private boolean markedSkipLF = false; 079 private static int defaultCharBufferSize = 8192; 080 private static int defaultExpectedLineLength = 80; 081 long bytesRead = 0; 082 083 /** 084 * Creates a buffering character-input stream that uses an input buffer of 085 * the specified size. 086 * 087 * @param in A Reader 088 * @param sz Input-buffer size 089 * 090 * @exception IllegalArgumentException If sz is <= 0 091 */ 092 public BufferedReaderBytesRead(Reader in, int sz) { 093 super(in); 094 if (sz <= 0) { 095 throw new IllegalArgumentException("Buffer size <= 0"); 096 } 097 this.in = in; 098 cb = new char[sz]; 099 nextChar = nChars = 0; 100 } 101 102 /** 103 * Creates a buffering character-input stream that uses a default-sized 104 * input buffer. 105 * 106 * @param in A Reader 107 */ 108 public BufferedReaderBytesRead(Reader in) { 109 this(in, defaultCharBufferSize); 110 } 111 112 /** 113 * Keep track of bytesread via ReadLine to account for CR-LF in the stream. Does not keep track of position if 114 * use methods other than ReadLine. 115 * //TODO should override other methods and throw exception or keep track of bytes read 116 * @return 117 */ 118 public long getBytesRead() { 119 return bytesRead; 120 } 121 122 /** Checks to make sure that the stream has not been closed */ 123 private void ensureOpen() throws IOException { 124 if (in == null) { 125 throw new IOException("Stream closed"); 126 } 127 } 128 129 /** 130 * Fills the input buffer, taking the mark into account if it is valid. 131 */ 132 private void fill() throws IOException { 133 int dst; 134 if (markedChar <= UNMARKED) { 135 /* No mark */ 136 dst = 0; 137 } else { 138 /* Marked */ 139 int delta = nextChar - markedChar; 140 if (delta >= readAheadLimit) { 141 /* Gone past read-ahead limit: Invalidate mark */ 142 markedChar = INVALIDATED; 143 readAheadLimit = 0; 144 dst = 0; 145 } else { 146 if (readAheadLimit <= cb.length) { 147 /* Shuffle in the current buffer */ 148 System.arraycopy(cb, markedChar, cb, 0, delta); 149 markedChar = 0; 150 dst = delta; 151 } else { 152 /* Reallocate buffer to accommodate read-ahead limit */ 153 char[] ncb = new char[readAheadLimit]; 154 System.arraycopy(cb, markedChar, ncb, 0, delta); 155 cb = ncb; 156 markedChar = 0; 157 dst = delta; 158 } 159 nextChar = nChars = delta; 160 } 161 } 162 163 int n; 164 do { 165 n = in.read(cb, dst, cb.length - dst); 166 } while (n == 0); 167 if (n > 0) { 168 nChars = dst + n; 169 nextChar = dst; 170 } 171 } 172 173 /** 174 * Reads a single character. 175 * 176 * @return The character read, as an integer in the range 177 * 0 to 65535 (<tt>0x00-0xffff</tt>), or -1 if the 178 * end of the stream has been reached 179 * @exception IOException If an I/O error occurs 180 */ 181 @Override 182 public int read() throws IOException { 183 synchronized (lock) { 184 ensureOpen(); 185 for (;;) { 186 if (nextChar >= nChars) { 187 fill(); 188 if (nextChar >= nChars) { 189 return -1; 190 } 191 } 192 if (skipLF) { 193 skipLF = false; 194 if (cb[nextChar] == '\n') { 195 bytesRead++; 196 nextChar++; 197 continue; 198 } 199 } 200 bytesRead++; 201 return cb[nextChar++]; 202 } 203 } 204 } 205 206 /** 207 * Reads characters into a portion of an array, reading from the underlying 208 * stream if necessary. 209 */ 210 private int read1(char[] cbuf, int off, int len) throws IOException { 211 if (nextChar >= nChars) { 212 /* If the requested length is at least as large as the buffer, and 213 if there is no mark/reset activity, and if line feeds are not 214 being skipped, do not bother to copy the characters into the 215 local buffer. In this way buffered streams will cascade 216 harmlessly. */ 217 if (len >= cb.length && markedChar <= UNMARKED && !skipLF) { 218 return in.read(cbuf, off, len); 219 } 220 fill(); 221 } 222 if (nextChar >= nChars) { 223 return -1; 224 } 225 if (skipLF) { 226 skipLF = false; 227 if (cb[nextChar] == '\n') { 228 nextChar++; 229 if (nextChar >= nChars) { 230 fill(); 231 } 232 if (nextChar >= nChars) { 233 return -1; 234 } 235 } 236 } 237 int n = Math.min(len, nChars - nextChar); 238 System.arraycopy(cb, nextChar, cbuf, off, n); 239 nextChar += n; 240 return n; 241 } 242 243 /** 244 * Reads characters into a portion of an array. 245 * 246 * <p> This method implements the general contract of the corresponding 247 * <code>{@link Reader#read(char[], int, int) read}</code> method of the 248 * <code>{@link Reader}</code> class. As an additional convenience, it 249 * attempts to read as many characters as possible by repeatedly invoking 250 * the <code>read</code> method of the underlying stream. This iterated 251 * <code>read</code> continues until one of the following conditions becomes 252 * true: <ul> 253 * 254 * <li> The specified number of characters have been read, 255 * 256 * <li> The <code>read</code> method of the underlying stream returns 257 * <code>-1</code>, indicating end-of-file, or 258 * 259 * <li> The <code>ready</code> method of the underlying stream 260 * returns <code>false</code>, indicating that further input requests 261 * would block. 262 * 263 * </ul> If the first <code>read</code> on the underlying stream returns 264 * <code>-1</code> to indicate end-of-file then this method returns 265 * <code>-1</code>. Otherwise this method returns the number of characters 266 * actually read. 267 * 268 * <p> Subclasses of this class are encouraged, but not required, to 269 * attempt to read as many characters as possible in the same fashion. 270 * 271 * <p> Ordinarily this method takes characters from this stream's character 272 * buffer, filling it from the underlying stream as necessary. If, 273 * however, the buffer is empty, the mark is not valid, and the requested 274 * length is at least as large as the buffer, then this method will read 275 * characters directly from the underlying stream into the given array. 276 * Thus redundant <code>BufferedReaderBytesRead</code>s will not copy data 277 * unnecessarily. 278 * 279 * @param cbuf Destination buffer 280 * @param off Offset at which to start storing characters 281 * @param len Maximum number of characters to read 282 * 283 * @return The number of characters read, or -1 if the end of the 284 * stream has been reached 285 * 286 * @exception IOException If an I/O error occurs 287 */ 288 @Override 289 public int read(char[] cbuf, int off, int len) throws IOException { 290 synchronized (lock) { 291 ensureOpen(); 292 if ((off < 0) || (off > cbuf.length) || (len < 0) 293 || ((off + len) > cbuf.length) || ((off + len) < 0)) { 294 throw new IndexOutOfBoundsException(); 295 } else if (len == 0) { 296 return 0; 297 } 298 299 int n = read1(cbuf, off, len); 300 if (n <= 0) { 301 return n; 302 } 303 while ((n < len) && in.ready()) { 304 int n1 = read1(cbuf, off + n, len - n); 305 if (n1 <= 0) { 306 break; 307 } 308 n += n1; 309 } 310 bytesRead = bytesRead + n; 311 return n; 312 } 313 } 314 315 /** 316 * Reads a line of text. A line is considered to be terminated by any one 317 * of a line feed ('\n'), a carriage return ('\r'), or a carriage return 318 * followed immediately by a linefeed. 319 * 320 * @param ignoreLF If true, the next '\n' will be skipped 321 * 322 * @return A String containing the contents of the line, not including 323 * any line-termination characters, or null if the end of the 324 * stream has been reached 325 * 326 * @see java.io.LineNumberReader#readLine() 327 * 328 * @exception IOException If an I/O error occurs 329 */ 330 @SuppressWarnings("unused") 331 private String readLine(boolean ignoreLF) throws IOException { 332 StringBuffer s = null; 333 int startChar; 334 335 synchronized (lock) { 336 ensureOpen(); 337 boolean omitLF = ignoreLF || skipLF; 338 339 bufferLoop: 340 for (;;) { 341 342 if (nextChar >= nChars) { 343 fill(); 344 } 345 if (nextChar >= nChars) { /* EOF */ 346 if (s != null && s.length() > 0) { 347 348 return s.toString(); 349 } else { 350 return null; 351 } 352 } 353 boolean eol = false; 354 char c = 0; 355 int i; 356 357 /* Skip a leftover '\n', if necessary */ 358 if (omitLF && (cb[nextChar] == '\n')) { 359 nextChar++; 360 bytesRead++; 361 } 362 skipLF = false; 363 omitLF = false; 364 365 charLoop: 366 for (i = nextChar; i < nChars; i++) { 367 c = cb[i]; 368 if ((c == '\n') || (c == '\r')) { 369 bytesRead++; 370 eol = true; 371 break charLoop; 372 } 373 } 374 375 startChar = nextChar; 376 nextChar = i; 377 378 if (eol) { 379 String str; 380 if (s == null) { 381 str = new String(cb, startChar, i - startChar); 382 } else { 383 s.append(cb, startChar, i - startChar); 384 str = s.toString(); 385 } 386 nextChar++; 387 if (c == '\r') { 388 bytesRead++; 389 skipLF = true; 390 } 391 392 return str; 393 } 394 395 if (s == null) { 396 s = new StringBuffer(defaultExpectedLineLength); 397 } 398 s.append(cb, startChar, i - startChar); 399 400 } 401 } 402 } 403 404 /** 405 * Reads a line of text. A line is considered to be terminated by any one 406 * of a line feed ('\n'), a carriage return ('\r'), or a carriage return 407 * followed immediately by a linefeed. 408 * 409 * @return A String containing the contents of the line, not including 410 * any line-termination characters, or null if the end of the 411 * stream has been reached 412 * 413 * @exception IOException If an I/O error occurs 414 */ 415 public String readLine() throws IOException { 416 String line = readLine(false); 417 if (line != null) { 418 bytesRead = bytesRead + line.length(); 419 } 420 return line; 421 } 422 423 /** 424 * Skips characters. 425 * 426 * @param n The number of characters to skip 427 * 428 * @return The number of characters actually skipped 429 * 430 * @exception IllegalArgumentException If <code>n</code> is negative. 431 * @exception IOException If an I/O error occurs 432 */ 433 @Override 434 public long skip(long n) throws IOException { 435 if (n < 0L) { 436 throw new IllegalArgumentException("skip value is negative"); 437 } 438 synchronized (lock) { 439 ensureOpen(); 440 long r = n; 441 while (r > 0) { 442 if (nextChar >= nChars) { 443 fill(); 444 } 445 if (nextChar >= nChars) /* EOF */ { 446 break; 447 } 448 if (skipLF) { 449 skipLF = false; 450 if (cb[nextChar] == '\n') { 451 nextChar++; 452 } 453 } 454 long d = nChars - nextChar; 455 if (r <= d) { 456 nextChar += r; 457 r = 0; 458 break; 459 } else { 460 r -= d; 461 nextChar = nChars; 462 } 463 } 464 bytesRead = bytesRead + (n - r); 465 return n - r; 466 } 467 } 468 469 /** 470 * Tells whether this stream is ready to be read. A buffered character 471 * stream is ready if the buffer is not empty, or if the underlying 472 * character stream is ready. 473 * 474 * @exception IOException If an I/O error occurs 475 */ 476 @Override 477 public boolean ready() throws IOException { 478 synchronized (lock) { 479 ensureOpen(); 480 481 /* 482 * If newline needs to be skipped and the next char to be read 483 * is a newline character, then just skip it right away. 484 */ 485 if (skipLF) { 486 /* Note that in.ready() will return true if and only if the next 487 * read on the stream will not block. 488 */ 489 if (nextChar >= nChars && in.ready()) { 490 fill(); 491 } 492 if (nextChar < nChars) { 493 if (cb[nextChar] == '\n') { 494 nextChar++; 495 } 496 skipLF = false; 497 } 498 } 499 return (nextChar < nChars) || in.ready(); 500 } 501 } 502 503 /** 504 * Tells whether this stream supports the mark() operation, which it does. 505 */ 506 @Override 507 public boolean markSupported() { 508 return true; 509 } 510 511 /** 512 * Marks the present position in the stream. Subsequent calls to reset() 513 * will attempt to reposition the stream to this point. 514 * 515 * @param readAheadLimit Limit on the number of characters that may be 516 * read while still preserving the mark. An attempt 517 * to reset the stream after reading characters 518 * up to this limit or beyond may fail. 519 * A limit value larger than the size of the input 520 * buffer will cause a new buffer to be allocated 521 * whose size is no smaller than limit. 522 * Therefore large values should be used with care. 523 * 524 * @exception IllegalArgumentException If readAheadLimit is < 0 525 * @exception IOException If an I/O error occurs 526 */ 527 @Override 528 public void mark(int readAheadLimit) throws IOException { 529 if (readAheadLimit < 0) { 530 throw new IllegalArgumentException("Read-ahead limit < 0"); 531 } 532 synchronized (lock) { 533 ensureOpen(); 534 this.readAheadLimit = readAheadLimit; 535 markedChar = nextChar; 536 markedSkipLF = skipLF; 537 } 538 } 539 540 /** 541 * Resets the stream to the most recent mark. 542 * 543 * @exception IOException If the stream has never been marked, 544 * or if the mark has been invalidated 545 */ 546 @Override 547 public void reset() throws IOException { 548 synchronized (lock) { 549 ensureOpen(); 550 if (markedChar < 0) { 551 throw new IOException((markedChar == INVALIDATED) 552 ? "Mark invalid" 553 : "Stream not marked"); 554 } 555 nextChar = markedChar; 556 skipLF = markedSkipLF; 557 } 558 } 559 560 @Override 561 public void close() throws IOException { 562 synchronized (lock) { 563 if (in == null) { 564 return; 565 } 566 in.close(); 567 in = null; 568 cb = null; 569 } 570 } 571} 572