001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 05-04-2018
021 */
022
023package org.biojava.nbio.core.sequence.io;
024
025import java.io.IOException;
026import java.io.File;
027import java.io.BufferedInputStream;
028import java.io.ByteArrayOutputStream;
029import java.io.FileInputStream;
030import java.io.DataInputStream;
031import java.io.ByteArrayInputStream;
032import java.awt.Color;
033import java.awt.Graphics2D;
034import java.awt.image.BufferedImage;
035import java.net.URL;
036import java.io.InputStream;
037
038import org.biojava.nbio.core.sequence.compound.ABITracerCompoundSet;
039import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
040import org.biojava.nbio.core.sequence.template.AbstractSequence;
041import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
042
043/**
044 * Title: ABITrace<p><p>
045 * ABITrace is a class for managing ABI file information,
046 * it is capable of opening an ABI file and storing
047 * the most important fields, which can be recalled as simple java types. It can also return
048 * an image corresponding to the trace.
049 * It has three constructors with input types <code>File, URL, and byte[]</code>.<p><p>
050 * ABI files contain two sets of basecall and sequence data, one that was originally
051 * created programatically and the other, which is an editable copy. This version of this object
052 * only references the original unedited data.<p>
053 */
054public class ABITrace {
055
056        //the next three lines are the important persistent data
057        private String sequence;
058        private int A[], G[], C[], T[], baseCalls[], qCalls[];
059        private int traceLength, seqLength;
060
061        //This is the actual file data.
062        private byte[] traceData;
063
064        //the next four declaration lines comprise the file index information
065        private int macJunk = 0; //sometimes when macintosh files are
066        //FTPed in binary form, they have 128 bytes
067        //of crap pre-pended to them. This constant
068        //allows ABITrace to handle that in a way that
069        //is invisible to the user.
070        private static final int absIndexBase = 26; //The file location of the Index pointer
071        private int PLOC, PCON;
072
073        //the next declaration is for the actual file pointers
074        private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO;
075
076        /**
077         * The File constructor opens a local ABI file and parses the content.
078         *
079         * @param ABIFile is a <code>java.io.File</code> on the local file system.
080         * @throws IOException              if there is a problem reading the file.
081         * @throws IllegalArgumentException if the file is not a valid ABI file.
082         */
083        public ABITrace(File ABIFile) throws IOException
084        {
085                FileInputStream fis = new FileInputStream(ABIFile);
086                BufferedInputStream bis = new BufferedInputStream(fis);
087                ABITraceInit(bis);
088                fis.close();
089        }
090
091        /**
092         * The URL constructor opens an ABI file from any URL.
093         *
094         * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file.
095         * @throws IOException if there is a problem reading from the URL.
096         * @throws IllegalArgumentException if the URL does not contain a valid ABI file.
097         */
098        public ABITrace( URL ABIFile ) throws IOException
099        {
100                InputStream is = ABIFile.openStream();
101                BufferedInputStream bis = new BufferedInputStream(is);
102                ABITraceInit(bis);
103                is.close();
104        }
105
106        /**
107         * Helper method for constructors
108         *
109         * @param bis - BufferedInputStream
110         * @throws IOException if there is a problem reading from the BufferedInputStream
111         */
112        private void ABITraceInit(BufferedInputStream bis) throws IOException{
113                byte[] bytes = null;
114                ByteArrayOutputStream baos = new ByteArrayOutputStream();
115                int b;
116                while ((b = bis.read()) >= 0)
117                {
118                        baos.write(b);
119                }
120                bis.close(); baos.close();
121                bytes = baos.toByteArray();
122                initData(bytes);
123        }
124
125        /**
126         * The <code>byte[]</code> constructor parses an ABI file represented as a byte array.
127         *
128         * @param  ABIFileData - byte array
129         * @throws IllegalArgumentException if the data does not represent a valid ABI file.
130         */
131        public ABITrace(byte[] ABIFileData) {
132                initData(ABIFileData);
133        }
134
135        /**
136         * Returns the length of the sequence (number of bases) in this trace.
137         *
138         * @return int seqLength
139         */
140        public int getSequenceLength() {
141                return seqLength;
142        }
143
144        /**
145         * Returns the length of the trace (number of x-coordinate points in the graph).
146         *
147         * @return int traceLength
148         */
149        public int getTraceLength() {
150                return traceLength;
151        }
152
153        /**
154         * Returns an <code>int[]</code> array that represents the basecalls - each int in the
155         * array corresponds to an x-coordinate point in the graph that is a peak (a base location).
156         *
157         * @return int[] Basecalls
158         */
159        public int[] getBasecalls() {
160                return baseCalls;
161        }
162
163        /**
164         * Returns an <code>int[]</code> array that represents the quality - each int in the
165         * array corresponds to an quality value 90-255) in the graph at a base location).
166         *
167         * @return int[] qCalls
168         */
169        public int[] getQcalls() {
170                return qCalls;
171        }
172
173        /**
174         * Returns the original programmatically determined (unedited) sequence as a <code>AbstractSequence<NucleotideCompound></code>.
175         *
176         * @return AbstractSequence<NucleotideCompound> sequence
177         */
178        public AbstractSequence<NucleotideCompound> getSequence() throws CompoundNotFoundException {
179                DNASequenceCreator creator = new DNASequenceCreator(ABITracerCompoundSet.getABITracerCompoundSet());
180                return creator.getSequence(sequence, 0);
181        }
182
183        /**
184         * Returns one of the four traces - all of the y-coordinate values,
185         * each of which correspond to a single x-coordinate relative to the
186         * position in the array, so that if element 4 in the array is 972, then
187         * x is 4 and y is 972 for that point.
188         *
189         * @param base - the DNA String to retrieve the trace values for
190         * @return an array of ints giving the entire trace for that base
191         * @throws CompoundNotFoundException if the base is not valid
192         */
193        public int[] getTrace (String base) throws CompoundNotFoundException {
194                if (base.equals("A")) {
195                        return A;
196                } else if (base.equals("C")) {
197                        return C;
198                } else if (base.equals("G")) {
199                        return G;
200                } else if (base.equals("T")) {
201                        return T;
202                } else {
203                        throw new CompoundNotFoundException("Don't know base: " + base);
204                }
205        }
206
207        /**
208         * Returns a BufferedImage that represents the entire trace. The height can be set precisely in
209         * pixels, the width in pixels is determined by the scaling factor times the number
210         * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented
211         * in the returned image.
212         *
213         * @param imageHeight - desired height of the image in pixels.
214         * @param widthScale - how many horizontal pixels to use to represent a single x-coordinate (try 2).
215         * @return BufferedImage image
216         */
217        public BufferedImage getImage(int imageHeight, int widthScale) {
218                BufferedImage out = new BufferedImage(traceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED);
219                Graphics2D g = out.createGraphics();
220                Color acolor = Color.green.darker();
221                Color ccolor = Color.blue;
222                Color gcolor = Color.black;
223                Color tcolor = Color.red;
224                Color ncolor = Color.pink;
225                double scale = calculateScale(imageHeight);
226                int[] bc = baseCalls;
227                char[] seq = sequence.toCharArray();
228                g.setBackground(Color.white);
229                g.clearRect(0, 0, traceLength * widthScale, imageHeight);
230                int here = 0;
231                int basenum = 0;
232                for (int q = 1; q <= 5; q++) {
233                        for (int x = 0; x <= traceLength - 2; x++) {
234                                if (q == 1) {
235                                        g.setColor(acolor);
236                                        g.drawLine(widthScale * x, transmute(A[x], imageHeight, scale),
237                                                        widthScale * (x + 1), transmute(A[x + 1], imageHeight, scale));
238                                }
239                                if (q == 2) {
240                                        g.setColor(ccolor);
241                                        g.drawLine(widthScale * x, transmute(C[x], imageHeight, scale),
242                                                        widthScale * (x + 1), transmute(C[x + 1], imageHeight, scale));
243                                }
244                                if (q == 3) {
245                                        g.setColor(tcolor);
246                                        g.drawLine(widthScale * x, transmute(T[x], imageHeight, scale),
247                                                        widthScale * (x + 1), transmute(T[x + 1], imageHeight, scale));
248                                }
249                                if (q == 4) {
250                                        g.setColor(gcolor);
251                                        g.drawLine(widthScale * x, transmute(G[x], imageHeight, scale),
252                                                        widthScale * (x + 1), transmute(G[x + 1], imageHeight, scale));
253                                }
254                                if (q == 5) {
255                                        if ((here > bc.length - 1) || (basenum > seq.length - 1)) break;
256                                        if (bc[here] == x) {
257                                                g.drawLine(widthScale * x, transmute(-2, imageHeight, 1.0),
258                                                                widthScale * x, transmute(-7, imageHeight, 1.0));
259                                                if ((basenum + 1) % 10 == 0) //if the basecount is divisible by ten
260                                                //add a number
261                                                {
262                                                        g.drawLine(widthScale * x, transmute(-20, imageHeight, 1.0),
263                                                                        widthScale * x, transmute(-25, imageHeight, 1.0));
264                                                        g.drawString(Integer.toString(basenum + 1),
265                                                                        widthScale * x - 3, transmute(-36, imageHeight, 1.0));
266                                                }
267                                                switch (seq[basenum]) {
268                                                        case 'A':
269                                                        case 'a':
270                                                                g.setColor(acolor);
271                                                                break;
272                                                        case 'C':
273                                                        case 'c':
274                                                                g.setColor(ccolor);
275                                                                break;
276                                                        case 'G':
277                                                        case 'g':
278                                                                g.setColor(gcolor);
279                                                                break;
280                                                        case 'T':
281                                                        case 't':
282                                                                g.setColor(tcolor);
283                                                                break;
284                                                        default:
285                                                                g.setColor(ncolor);
286                                                }
287                                                g.drawChars(seq, basenum, 1,
288                                                                widthScale * x - 3, transmute(-18, imageHeight, 1.0));
289                                                g.setColor(Color.black);
290                                                here++;
291                                                basenum++;
292                                        }
293                                }
294                        }
295                }
296                return out;
297        }
298
299        /**
300         * Utility method to translate y coordinates from graph space (where up is greater)
301         * to image space (where down is greater).
302         *
303         * @param ya
304         * @param height
305         * @param scale
306         * @return - translated y coordinates from graph space (where up is greater) to image space
307         */
308        private int transmute(int ya, int height, double scale) {
309                return (height - 45 - (int) (ya * scale));
310        }
311
312        //calculates the necessary scaling to allow the trace to fit vertically
313        //in the space specified.
314
315        /**
316         * Returns the scaling factor necessary to allow all of the traces to fit vertically
317         * into the specified space.
318         *
319         * @param height - required height in pixels
320         * @return - scaling factor
321         */
322        private double calculateScale(int height) {
323                double newScale = 0.0;
324                double max = (double) getMaximum();
325                double ht = (double) height;
326                newScale = ((ht - 50.0)) / max;
327                return newScale;
328        }
329
330        /**
331         * Get the maximum height of any of the traces. The data is persisted for performance
332         * in the event of multiple calls, but it initialized lazily.
333         *
334         * @return - maximum height of any of the traces
335         */
336        private int getMaximum() {
337                int max = 0;
338                for (int x = 0; x <= T.length - 1; x++) {
339                        if (T[x] > max) max = T[x];
340                        if (A[x] > max) max = A[x];
341                        if (C[x] > max) max = C[x];
342                        if (G[x] > max) max = G[x];
343                }
344                return max;
345        }
346
347        /**
348         * Initialize all of the data fields for this object.
349         *
350         * @param fileData - data for object
351         * @throws IllegalArgumentException which will propagate to all of the constructors.
352         */
353        private void initData(byte[] fileData) {
354                traceData = fileData;
355                if (isABI()) {
356                        setIndex();
357                        setBasecalls();
358                        setQcalls();
359                        setSeq();
360                        setTraces();
361                } else throw new IllegalArgumentException("Not a valid ABI file.");
362        }
363
364        /**
365         * Shuffle the pointers to point to the proper spots in the trace, then load the
366         * traces into their arrays.
367         */
368        private void setTraces() {
369                int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T
370                int datas[] = new int[4];
371                char order[] = new char[4];
372
373                datas[0] = DATA9;
374                datas[1] = DATA10;
375                datas[2] = DATA11;
376                datas[3] = DATA12;
377
378                for (int i = 0; i <= 3; i++) {
379                        order[i] = (char) traceData[FWO + i];
380                }
381
382                for (int i = 0; i <= 3; i++) {
383                        switch (order[i]) {
384                                case 'A':
385                                case 'a':
386                                        pointers[0] = datas[i];
387                                        break;
388                                case 'C':
389                                case 'c':
390                                        pointers[1] = datas[i];
391                                        break;
392                                case 'G':
393                                case 'g':
394                                        pointers[2] = datas[i];
395                                        break;
396                                case 'T':
397                                case 't':
398                                        pointers[3] = datas[i];
399                                        break;
400                                default:
401                                        throw new IllegalArgumentException("Trace contains illegal values.");
402                        }
403                }
404
405                A = new int[traceLength];
406                C = new int[traceLength];
407                G = new int[traceLength];
408                T = new int[traceLength];
409
410                for (int i = 0; i <= 3; i++) {
411                        byte[] qq = new byte[traceLength * 2];
412                        getSubArray(qq, pointers[i]);
413                        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
414                        for (int x = 0; x <= traceLength - 1; x++) {
415                                try {
416                                        if (i == 0) A[x] = (int) dis.readShort();
417                                        if (i == 1) C[x] = (int) dis.readShort();
418                                        if (i == 2) G[x] = (int) dis.readShort();
419                                        if (i == 3) T[x] = (int) dis.readShort();
420                                } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong.
421                                {
422                                        throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
423                                }
424                        }
425                }
426                return;
427        }
428
429        /**
430         * Fetch the sequence from the trace data.
431         */
432        private void setSeq() {
433                char tempseq[] = new char[seqLength];
434                for (int x = 0; x <= seqLength - 1; ++x) {
435                        tempseq[x] = (char) traceData[PBAS2 + x];
436                }
437                sequence = new String(tempseq);
438        }
439
440        /**
441         * Fetch the quality calls from the trace data.
442         */
443        private void setQcalls() {
444                qCalls = new int[seqLength];
445                byte[] qq = new byte[seqLength];
446                getSubArray(qq, PCON);
447                DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
448                for (int i = 0; i <= seqLength - 1; ++i) {
449                        try {
450                                qCalls[i] = (int) dis.readByte();
451                        } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong.
452                        {
453                                throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
454                        }
455                }
456        }
457
458        /**
459         * Fetch the basecalls from the trace data.
460         */
461        private void setBasecalls() {
462                baseCalls = new int[seqLength];
463                byte[] qq = new byte[seqLength * 2];
464                getSubArray(qq, PLOC);
465                DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
466                for (int i = 0; i <= seqLength - 1; ++i) {
467                        try {
468                                baseCalls[i] = (int) dis.readShort();
469                        } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong.
470                        {
471                                throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
472                        }
473                }
474        }
475
476        /**
477         * Sets up all of the initial pointers to the important records in TraceData.
478         */
479        private void setIndex() {
480                int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords, indexBase;
481                byte[] RecNameArray = new byte[4];
482                String RecName;
483
484                DataCounter = 0;
485                PBASCounter = 0;
486                PLOCCounter = 0;
487                PCONCounter = 0;
488
489                indexBase = getIntAt(absIndexBase + macJunk);
490                NumRecords = getIntAt(absIndexBase - 8 + macJunk);
491
492                for (int record = 0; record <= NumRecords - 1; record++) {
493                        getSubArray(RecNameArray, (indexBase + (record * 28)));
494                        RecName = new String(RecNameArray);
495                        if (RecName.equals("FWO_"))
496                                FWO = indexBase + (record * 28) + 20;
497                        if (RecName.equals("DATA")) {
498                                ++DataCounter;
499                                if (DataCounter == 9)
500                                        DATA9 = indexBase + (record * 28) + 20;
501                                if (DataCounter == 10)
502                                        DATA10 = indexBase + (record * 28) + 20;
503                                if (DataCounter == 11)
504                                        DATA11 = indexBase + (record * 28) + 20;
505                                if (DataCounter == 12)
506                                        DATA12 = indexBase + (record * 28) + 20;
507                        }
508                        if (RecName.equals("PBAS")) {
509                                ++PBASCounter;
510                                if (PBASCounter == 2)
511                                        PBAS2 = indexBase + (record * 28) + 20;
512                        }
513                        if (RecName.equals("PLOC")) {
514                                ++PLOCCounter;
515                                if (PLOCCounter == 2)
516                                        PLOC = indexBase + (record * 28) + 20;
517                        }
518                        if (RecName.equals("PCON")) {
519                                ++PCONCounter;
520                                if (PCONCounter == 2)
521                                        PCON = indexBase + (record * 28) + 20;
522                        }
523
524                } //next record
525                traceLength = getIntAt(DATA12 - 8);
526                seqLength = getIntAt(PBAS2 - 4);
527                PLOC = getIntAt(PLOC) + macJunk;
528                DATA9 = getIntAt(DATA9) + macJunk;
529                DATA10 = getIntAt(DATA10) + macJunk;
530                DATA11 = getIntAt(DATA11) + macJunk;
531                DATA12 = getIntAt(DATA12) + macJunk;
532                PBAS2 = getIntAt(PBAS2) + macJunk;
533                PCON = getIntAt(PCON) + macJunk;
534        }
535
536        /**
537         * Utility method to return an int beginning at <code>pointer</code> in the TraceData array.
538         *
539         * @param pointer - beginning of trace array
540         * @return - int beginning at pointer in trace array
541         */
542        private int getIntAt(int pointer) {
543                int out = 0;
544                byte[] temp = new byte[4];
545                getSubArray(temp, pointer);
546                try {
547                        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp));
548                        out = dis.readInt();
549                } catch (IOException e) //This shouldn't happen. If it does something must be seriously wrong.
550                {
551                        throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
552                }
553                return out;
554        }
555
556        /**
557         * A utility method which fills array b with data from the trace starting at traceDataOffset.
558         *
559         * @param b - trace byte array
560         * @param traceDataOffset - starting point
561         */
562        private void getSubArray(byte[] b, int traceDataOffset) {
563                for (int x = 0; x <= b.length - 1; x++) {
564                        b[x] = traceData[traceDataOffset + x];
565                }
566        }
567
568        /**
569         * Test to see if the file is ABI format by checking to see that the first three bytes
570         * are "ABI". Also handle the special case where 128 bytes were prepended to the file
571         * due to binary FTP from an older macintosh system.
572         *
573         * @return - if format of ABI file is correct
574         */
575        private boolean isABI() {
576                char ABI[] = new char[4];
577
578                for (int i = 0; i <= 2; i++) {
579                        ABI[i] = (char) traceData[i];
580                }
581                if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) {
582                        return true;
583                } else {
584                        for (int i = 128; i <= 130; i++) {
585                                ABI[i-128] = (char) traceData[i];
586                        }
587                        if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) {
588                                macJunk = 128;
589                                return true;
590                        } else
591                                return false;
592                }
593        }
594}