001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.io;
022
023import java.io.FileReader;
024import java.io.IOException;
025import java.io.InputStreamReader;
026import java.io.Reader;
027
028/**
029 * Need to keep track of actual bytes read and take advantage of buffered reader
030 * performance. Took java source for BufferedReader and added BytesRead functionality<br>
031 * ---------- original buffered reader ----------------------<BR>
032 * Reads text from a character-input stream, buffering characters so as to
033 * provide for the efficient reading of characters, arrays, and lines.
034 *
035 * <p> The buffer size may be specified, or the default size may be used.  The
036 * default is large enough for most purposes.
037 *
038 * <p> In general, each read request made of a Reader causes a corresponding
039 * read request to be made of the underlying character or byte stream.  It is
040 * therefore advisable to wrap a BufferedReaderBytesRead around any Reader whose read()
041 * operations may be costly, such as FileReaders and InputStreamReaders.  For
042 * example,
043 *
044 * <pre>
045 * BufferedReaderBytesRead in
046 *   = new BufferedReaderBytesRead(new FileReader("foo.in"));
047 * </pre>
048 *
049 * will buffer the input from the specified file.  Without buffering, each
050 * invocation of read() or readLine() could cause bytes to be read from the
051 * file, converted into characters, and then returned, which can be very
052 * inefficient.
053 *
054 * <p> Programs that use DataInputStreams for textual input can be localized by
055 * replacing each DataInputStream with an appropriate BufferedReaderBytesRead.
056 *
057 * @see FileReader
058 * @see InputStreamReader
059 *
060 * @version     1.37, 06/03/15
061 * @author      Mark Reinhold
062 * @author Scooter Willis &lt;willishf at gmail dot com&gt;
063 * @since       JDK1.1
064 */
065public class BufferedReaderBytesRead extends Reader {
066
067        private Reader in;
068        private char[] cb;
069        private int nChars, nextChar;
070        private static final int INVALIDATED = -2;
071        private static final int UNMARKED = -1;
072        private int markedChar = UNMARKED;
073        private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
074
075        /** If the next character is a line feed, skip it */
076        private boolean skipLF = false;
077        /** The skipLF flag when the mark was set */
078        private boolean markedSkipLF = false;
079        private static final int defaultCharBufferSize = 8192;
080        private static final int defaultExpectedLineLength = 80;
081        long bytesRead = 0;
082
083        /**
084         * Creates a buffering character-input stream that uses an input buffer of
085         * the specified size.
086         *
087         * @param  in   A Reader
088         * @param  sz   Input-buffer size
089         *
090         * @exception  IllegalArgumentException  If sz is &lt;= 0
091         */
092        public BufferedReaderBytesRead(Reader in, int sz) {
093                super(in);
094                if (sz <= 0) {
095                        throw new IllegalArgumentException("Buffer size <= 0");
096                }
097                this.in = in;
098                cb = new char[sz];
099                nextChar = nChars = 0;
100        }
101
102        /**
103         * Creates a buffering character-input stream that uses a default-sized
104         * input buffer.
105         *
106         * @param  in   A Reader
107         */
108        public BufferedReaderBytesRead(Reader in) {
109                this(in, defaultCharBufferSize);
110        }
111
112        /**
113         * Keep track of bytesread via ReadLine to account for CR-LF in the stream. Does not keep track of position if
114         * use methods other than ReadLine.
115         * //TODO should override other methods and throw exception or keep track of bytes read
116         * @return
117         */
118        public long getBytesRead() {
119                return bytesRead;
120        }
121
122        /** Checks to make sure that the stream has not been closed */
123        private void ensureOpen() throws IOException {
124                if (in == null) {
125                        throw new IOException("Stream closed");
126                }
127        }
128
129        /**
130         * Fills the input buffer, taking the mark into account if it is valid.
131         */
132        private void fill() throws IOException {
133                int dst;
134                if (markedChar <= UNMARKED) {
135                        /* No mark */
136                        dst = 0;
137                } else {
138                        /* Marked */
139                        int delta = nextChar - markedChar;
140                        if (delta >= readAheadLimit) {
141                                /* Gone past read-ahead limit: Invalidate mark */
142                                markedChar = INVALIDATED;
143                                readAheadLimit = 0;
144                                dst = 0;
145                        } else {
146                                if (readAheadLimit <= cb.length) {
147                                        /* Shuffle in the current buffer */
148                                        System.arraycopy(cb, markedChar, cb, 0, delta);
149                                        markedChar = 0;
150                                        dst = delta;
151                                } else {
152                                        /* Reallocate buffer to accommodate read-ahead limit */
153                                        char[] ncb = new char[readAheadLimit];
154                                        System.arraycopy(cb, markedChar, ncb, 0, delta);
155                                        cb = ncb;
156                                        markedChar = 0;
157                                        dst = delta;
158                                }
159                                nextChar = nChars = delta;
160                        }
161                }
162
163                int n;
164                do {
165                        n = in.read(cb, dst, cb.length - dst);
166                } while (n == 0);
167                if (n > 0) {
168                        nChars = dst + n;
169                        nextChar = dst;
170                }
171        }
172
173        /**
174         * Reads a single character.
175         *
176         * @return The character read, as an integer in the range
177         *         0 to 65535 (<code>0x00-0xffff</code>), or -1 if the
178         *         end of the stream has been reached
179         * @exception  IOException  If an I/O error occurs
180         */
181        @Override
182        public int read() throws IOException {
183                synchronized (lock) {
184                        ensureOpen();
185                        for (;;) {
186                                if (nextChar >= nChars) {
187                                        fill();
188                                        if (nextChar >= nChars) {
189                                                return -1;
190                                        }
191                                }
192                                if (skipLF) {
193                                        skipLF = false;
194                                        if (cb[nextChar] == '\n') {
195                                                bytesRead++;
196                                                nextChar++;
197                                                continue;
198                                        }
199                                }
200                                bytesRead++;
201                                return cb[nextChar++];
202                        }
203                }
204        }
205
206        /**
207         * Reads characters into a portion of an array, reading from the underlying
208         * stream if necessary.
209         */
210        private int read1(char[] cbuf, int off, int len) throws IOException {
211                if (nextChar >= nChars) {
212                        /* If the requested length is at least as large as the buffer, and
213                        if there is no mark/reset activity, and if line feeds are not
214                        being skipped, do not bother to copy the characters into the
215                        local buffer.  In this way buffered streams will cascade
216                        harmlessly. */
217                        if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
218                                return in.read(cbuf, off, len);
219                        }
220                        fill();
221                }
222                if (nextChar >= nChars) {
223                        return -1;
224                }
225                if (skipLF) {
226                        skipLF = false;
227                        if (cb[nextChar] == '\n') {
228                                nextChar++;
229                                if (nextChar >= nChars) {
230                                        fill();
231                                }
232                                if (nextChar >= nChars) {
233                                        return -1;
234                                }
235                        }
236                }
237                int n = Math.min(len, nChars - nextChar);
238                System.arraycopy(cb, nextChar, cbuf, off, n);
239                nextChar += n;
240                return n;
241        }
242
243        /**
244         * Reads characters into a portion of an array.
245         *
246         * <p> This method implements the general contract of the corresponding
247         * <code>{@link Reader#read(char[], int, int) read}</code> method of the
248         * <code>{@link Reader}</code> class.  As an additional convenience, it
249         * attempts to read as many characters as possible by repeatedly invoking
250         * the <code>read</code> method of the underlying stream.  This iterated
251         * <code>read</code> continues until one of the following conditions becomes
252         * true: <ul>
253         *
254         *   <li> The specified number of characters have been read,
255         *
256         *   <li> The <code>read</code> method of the underlying stream returns
257         *   <code>-1</code>, indicating end-of-file, or
258         *
259         *   <li> The <code>ready</code> method of the underlying stream
260         *   returns <code>false</code>, indicating that further input requests
261         *   would block.
262         *
263         * </ul> If the first <code>read</code> on the underlying stream returns
264         * <code>-1</code> to indicate end-of-file then this method returns
265         * <code>-1</code>.  Otherwise this method returns the number of characters
266         * actually read.
267         *
268         * <p> Subclasses of this class are encouraged, but not required, to
269         * attempt to read as many characters as possible in the same fashion.
270         *
271         * <p> Ordinarily this method takes characters from this stream's character
272         * buffer, filling it from the underlying stream as necessary.  If,
273         * however, the buffer is empty, the mark is not valid, and the requested
274         * length is at least as large as the buffer, then this method will read
275         * characters directly from the underlying stream into the given array.
276         * Thus redundant <code>BufferedReaderBytesRead</code>s will not copy data
277         * unnecessarily.
278         *
279         * @param      cbuf  Destination buffer
280         * @param      off   Offset at which to start storing characters
281         * @param      len   Maximum number of characters to read
282         *
283         * @return     The number of characters read, or -1 if the end of the
284         *             stream has been reached
285         *
286         * @exception  IOException  If an I/O error occurs
287         */
288        @Override
289        public int read(char[] cbuf, int off, int len) throws IOException {
290                synchronized (lock) {
291                        ensureOpen();
292                        if ((off < 0) || (off > cbuf.length) || (len < 0)
293                                        || ((off + len) > cbuf.length) || ((off + len) < 0)) {
294                                throw new IndexOutOfBoundsException();
295                        } else if (len == 0) {
296                                return 0;
297                        }
298
299                        int n = read1(cbuf, off, len);
300                        if (n <= 0) {
301                                return n;
302                        }
303                        while ((n < len) && in.ready()) {
304                                int n1 = read1(cbuf, off + n, len - n);
305                                if (n1 <= 0) {
306                                        break;
307                                }
308                                n += n1;
309                        }
310                        bytesRead = bytesRead + n;
311                        return n;
312                }
313        }
314
315        /**
316         * Reads a line of text.  A line is considered to be terminated by any one
317         * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
318         * followed immediately by a linefeed.
319         *
320         * @param      ignoreLF  If true, the next '\n' will be skipped
321         *
322         * @return     A String containing the contents of the line, not including
323         *             any line-termination characters, or null if the end of the
324         *             stream has been reached
325         *
326         * @see        java.io.LineNumberReader#readLine()
327         *
328         * @exception  IOException  If an I/O error occurs
329         */
330        @SuppressWarnings("unused")
331        private String readLine(boolean ignoreLF) throws IOException {
332                StringBuffer s = null;
333                int startChar;
334
335                synchronized (lock) {
336                        ensureOpen();
337                        boolean omitLF = ignoreLF || skipLF;
338
339                        bufferLoop:
340                        for (;;) {
341
342                                if (nextChar >= nChars) {
343                                        fill();
344                                }
345                                if (nextChar >= nChars) { /* EOF */
346                                        if (s != null && s.length() > 0) {
347
348                                                return s.toString();
349                                        } else {
350                                                return null;
351                                        }
352                                }
353                                boolean eol = false;
354                                char c = 0;
355                                int i;
356
357                                /* Skip a leftover '\n', if necessary */
358                                if (omitLF && (cb[nextChar] == '\n')) {
359                                        nextChar++;
360                                        bytesRead++;
361                                }
362                                skipLF = false;
363                                omitLF = false;
364
365                                charLoop:
366                                for (i = nextChar; i < nChars; i++) {
367                                        c = cb[i];
368                                        if ((c == '\n') || (c == '\r')) {
369                                                bytesRead++;
370                                                eol = true;
371                                                break charLoop;
372                                        }
373                                }
374
375                                startChar = nextChar;
376                                nextChar = i;
377
378                                if (eol) {
379                                        String str;
380                                        if (s == null) {
381                                                str = new String(cb, startChar, i - startChar);
382                                        } else {
383                                                s.append(cb, startChar, i - startChar);
384                                                str = s.toString();
385                                        }
386                                        nextChar++;
387                                        if (c == '\r') {
388                                                bytesRead++;
389                                                skipLF = true;
390                                        }
391
392                                        return str;
393                                }
394
395                                if (s == null) {
396                                        s = new StringBuffer(defaultExpectedLineLength);
397                                }
398                                s.append(cb, startChar, i - startChar);
399
400                        }
401                }
402        }
403
404        /**
405         * Reads a line of text.  A line is considered to be terminated by any one
406         * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
407         * followed immediately by a linefeed.
408         *
409         * @return     A String containing the contents of the line, not including
410         *             any line-termination characters, or null if the end of the
411         *             stream has been reached
412         *
413         * @exception  IOException  If an I/O error occurs
414         */
415        public String readLine() throws IOException {
416                String line = readLine(false);
417                if (line != null) {
418                        bytesRead = bytesRead + line.length();
419                }
420                return line;
421        }
422
423        /**
424         * Skips characters.
425         *
426         * @param  n  The number of characters to skip
427         *
428         * @return    The number of characters actually skipped
429         *
430         * @exception  IllegalArgumentException  If <code>n</code> is negative.
431         * @exception  IOException  If an I/O error occurs
432         */
433        @Override
434        public long skip(long n) throws IOException {
435                if (n < 0L) {
436                        throw new IllegalArgumentException("skip value is negative");
437                }
438                synchronized (lock) {
439                        ensureOpen();
440                        long r = n;
441                        while (r > 0) {
442                                if (nextChar >= nChars) {
443                                        fill();
444                                }
445                                if (nextChar >= nChars) /* EOF */ {
446                                        break;
447                                }
448                                if (skipLF) {
449                                        skipLF = false;
450                                        if (cb[nextChar] == '\n') {
451                                                nextChar++;
452                                        }
453                                }
454                                long d = (long)nChars - nextChar;
455                                if (r <= d) {
456                                        nextChar += r;
457                                        r = 0;
458                                        break;
459                                } else {
460                                        r -= d;
461                                        nextChar = nChars;
462                                }
463                        }
464                        bytesRead = bytesRead + (n - r);
465                        return n - r;
466                }
467        }
468
469        /**
470         * Tells whether this stream is ready to be read.  A buffered character
471         * stream is ready if the buffer is not empty, or if the underlying
472         * character stream is ready.
473         *
474         * @exception  IOException  If an I/O error occurs
475         */
476        @Override
477        public boolean ready() throws IOException {
478                synchronized (lock) {
479                        ensureOpen();
480
481                        /*
482                         * If newline needs to be skipped and the next char to be read
483                         * is a newline character, then just skip it right away.
484                         */
485                        if (skipLF) {
486                                /* Note that in.ready() will return true if and only if the next
487                                 * read on the stream will not block.
488                                 */
489                                if (nextChar >= nChars && in.ready()) {
490                                        fill();
491                                }
492                                if (nextChar < nChars) {
493                                        if (cb[nextChar] == '\n') {
494                                                nextChar++;
495                                        }
496                                        skipLF = false;
497                                }
498                        }
499                        return (nextChar < nChars) || in.ready();
500                }
501        }
502
503        /**
504         * Tells whether this stream supports the mark() operation, which it does.
505         */
506        @Override
507        public boolean markSupported() {
508                return true;
509        }
510
511        /**
512         * Marks the present position in the stream.  Subsequent calls to reset()
513         * will attempt to reposition the stream to this point.
514         *
515         * @param readAheadLimit   Limit on the number of characters that may be
516         *                         read while still preserving the mark. An attempt
517         *                         to reset the stream after reading characters
518         *                         up to this limit or beyond may fail.
519         *                         A limit value larger than the size of the input
520         *                         buffer will cause a new buffer to be allocated
521         *                         whose size is no smaller than limit.
522         *                         Therefore large values should be used with care.
523         *
524         * @exception  IllegalArgumentException  If readAheadLimit is &lt; 0
525         * @exception  IOException  If an I/O error occurs
526         */
527        @Override
528        public void mark(int readAheadLimit) throws IOException {
529                if (readAheadLimit < 0) {
530                        throw new IllegalArgumentException("Read-ahead limit < 0");
531                }
532                synchronized (lock) {
533                        ensureOpen();
534                        this.readAheadLimit = readAheadLimit;
535                        markedChar = nextChar;
536                        markedSkipLF = skipLF;
537                }
538        }
539
540        /**
541         * Resets the stream to the most recent mark.
542         *
543         * @exception  IOException  If the stream has never been marked,
544         *                          or if the mark has been invalidated
545         */
546        @Override
547        public void reset() throws IOException {
548                synchronized (lock) {
549                        ensureOpen();
550                        if (markedChar < 0) {
551                                throw new IOException((markedChar == INVALIDATED)
552                                                ? "Mark invalid"
553                                                : "Stream not marked");
554                        }
555                        nextChar = markedChar;
556                        skipLF = markedSkipLF;
557                }
558        }
559
560        @Override
561        public void close() throws IOException {
562                synchronized (lock) {
563                        if (in == null) {
564                                return;
565                        }
566                        in.close();
567                        in = null;
568                        cb = null;
569                }
570        }
571}
572