001 
002/*
003 *                    BioJava development code
004 *
005 * This code may be freely distributed and modified under the
006 * terms of the GNU Lesser General Public Licence.  This should
007 * be distributed with the code.  If you do not have a copy,
008 * see:
009 *
010 *      http://www.gnu.org/copyleft/lesser.html
011 *
012 * Copyright for this code is held jointly by the individual
013 * authors.  These should be listed in @author doc comments.
014 *
015 * For more information on the BioJava project and its aims,
016 * or to join the biojava-l mailing list, visit the home page
017 * at:
018 *
019 *      http://www.biojava.org/
020 *
021 */
022 
023package org.biojava.bio.program.abi;
024
025import java.awt.Color;
026import java.awt.Graphics2D;
027import java.awt.image.BufferedImage;
028import java.io.BufferedInputStream;
029import java.io.ByteArrayInputStream;
030import java.io.ByteArrayOutputStream;
031import java.io.DataInputStream;
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.net.URL;
037
038import org.biojava.bio.BioError;
039import org.biojava.bio.seq.DNATools;
040import org.biojava.bio.symbol.AtomicSymbol;
041import org.biojava.bio.symbol.IllegalSymbolException;
042import org.biojava.bio.symbol.SymbolList;
043
044
045/**
046 * Title: ABITrace<br><br>
047 * ABITrace is a class for managing ABI file information,
048 *  it is capable of opening an ABI file and storing
049 *  the most important fields, which can be recalled as simple java types. It can also return
050 *  an image corresponding to the trace.
051 *  It has three constructors with input types <code>File, URL, and byte[]</code>.<br><br>
052 *  ABI files contain two sets of basecall and sequence data, one that was originally
053 *  created programatically and the other, which is an editable copy. This version of this object
054 *  only references the original unedited data.<br>
055 *
056 * Copyright (c) 2001
057 * @author David H. Klatte, Ph.D.
058 * @author Matthew Pocock
059 * @version 0.5alpha
060 */
061public class ABITrace
062{
063
064  //the next three lines are the important persistent data
065  private String sequence;
066  private int A[], G[], C[], T[], Basecalls[], Qcalls[];
067  private int TraceLength, SeqLength;
068
069  //This is the actual file data.
070  private byte[] TraceData;
071
072  private int maximum = 0;
073
074  //the next four declaration lines comprise the file index information
075  private int MacJunk=0; //sometimes when macintosh files are
076                         //FTPed in binary form, they have 128 bytes
077                         //of crap pre-pended to them. This constant
078                         //allows ABITrace to handle that in a way that
079                         //is invisible to the user.
080  private static int AbsIndexBase=26; //The file location of the Index pointer
081  private int IndexBase, PLOC, PCON;
082
083  //the next declaration is for the actual file pointers
084  private  int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO;
085
086/**
087 * The File constructor opens a local ABI file and parses the content.
088 * @param ABIFile is a <code>java.io.File</code> on the local file system.
089 * @throws IOException if there is a problem reading the file.
090 * @throws IllegalArgumentException if the file is not a valid ABI file.
091 */
092  public ABITrace( File ABIFile ) throws IOException
093  {
094    byte[] bytes = null;
095    ByteArrayOutputStream baos = new ByteArrayOutputStream();
096    FileInputStream fis = new FileInputStream(ABIFile);
097    BufferedInputStream bis = new BufferedInputStream(fis);
098    int b;
099    while ((b = bis.read()) >= 0)
100    {
101      baos.write(b);
102    }
103    bis.close(); fis.close(); baos.close();
104    bytes = baos.toByteArray();
105    initData(bytes);
106  }
107
108/**
109 * The URL constructor opens an ABI file from any URL.
110 * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file.
111 * @throws IOException if there is a problem reading from the URL.
112 * @throws IllegalArgumentException if the URL does not contain a valid ABI file.
113 */
114  public ABITrace( URL ABIFile ) throws IOException
115  {
116    byte[] bytes = null;
117    ByteArrayOutputStream baos = new ByteArrayOutputStream();
118    InputStream is = ABIFile.openStream();
119    BufferedInputStream bis = new BufferedInputStream(is);
120    int b;
121    while ((b = bis.read()) >= 0)
122    {
123      baos.write(b);
124    }
125    bis.close(); is.close(); baos.close();
126    bytes = baos.toByteArray();
127    initData(bytes);
128  }
129
130/**
131 * The <code>byte[]</code> constructor parses an ABI file represented as a byte array.
132 * @throws IllegalArgumentException if the data does not represent a valid ABI file.
133 */
134  public ABITrace(byte[] ABIFileData)
135  {
136    initData(ABIFileData);
137  }
138
139/**
140 * Returns the length of the sequence (number of bases) in this trace.
141 */
142  public int getSequenceLength() { return SeqLength; }
143
144/**
145 * Returns the length of the trace (number of x-coordinate points in the graph).
146 */
147  public int getTraceLength() { return TraceLength; }
148
149/**
150 * Returns an <code>int[]</code> array that represents the basecalls - each int in the
151 * array corresponds to an x-coordinate point in the graph that is a peak (a base location).
152 */
153  public int[] getBasecalls() { return Basecalls; }
154  
155  /**
156 * Returns an <code>int[]</code> array that represents the quality - each int in the
157 * array corresponds to an quality value 90-255) in the graph at a base location).
158 */
159  public int[] getQcalls() { return Qcalls; }
160
161/**
162 * Returns the original programatically determined (unedited) sequence as a <code>SymbolList</code>.
163 */
164  public SymbolList getSequence() throws BioError
165  { 
166    try {
167      return DNATools.createDNA(sequence); 
168    }
169    catch (IllegalSymbolException ise) {
170      // this should be impossible!
171      throw new BioError(ise);
172    }
173  }
174
175/**
176 * Returns one of the four traces - all of the y-coordinate values,
177 * each of which correspond to a single x-coordinate relative to the
178 * position in the array, so that if element 4 in the array is 972, then
179 * x is 4 and y is 972 for that point.
180 *
181 * @param base  the DNA AttomicSymbol to retrieve the trace values for
182 * @return an array of ints giving the entire trace for that base
183 * @throws IllegalSymbolException if the base is not valid
184 */
185  public int[] getTrace (AtomicSymbol base) throws IllegalSymbolException
186  {
187    if (base == DNATools.a()) {
188      return A;
189    } else if (base == DNATools.c()) {
190      return C;
191    } else if (base == DNATools.g()) {
192      return G;
193    } else if (base == DNATools.t()) {
194      return T;
195    } else {
196      DNATools.getDNA().validate(base);
197      throw new IllegalSymbolException("Don't know symbol: " + base);
198    }
199  }
200
201/**
202 * Returns a BufferedImage that represents the entire trace. The height can be set precisely in
203 * pixels, the width in pixels is determined by the scaling factor times the number
204 * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented
205 * in the returned image.
206 *
207 * @param imageHeight is the desired height of the image in pixels.
208 * @param widthScale indiates how many horizontal pixels to use to represent a single x-coordinate (try 2).
209 */
210  public BufferedImage getImage(int imageHeight, int widthScale)
211  {
212    BufferedImage out = new BufferedImage(TraceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED);
213    Graphics2D g = out.createGraphics();
214    Color acolor = Color.green.darker();
215    Color ccolor = Color.blue;
216    Color gcolor = Color.black;
217    Color tcolor = Color.red;
218    Color ncolor = Color.pink;
219    double scale = calculateScale(imageHeight);
220    int[] bc = Basecalls;
221    char[] seq = sequence.toCharArray();
222    g.setBackground(Color.white);
223    g.clearRect(0, 0, TraceLength * widthScale, imageHeight);
224    int here = 0;
225    int basenum = 0;
226    for (int q = 1; q <= 5; q++)
227    {
228      for (int x = 0; x <= TraceLength - 2; x++)
229      {
230        if (q==1)
231        {
232          g.setColor(acolor);
233          g.drawLine(2*x, transmute(A[x], imageHeight, scale),
234                     2*(x + 1), transmute(A[x+1], imageHeight, scale));
235        }
236        if (q==2)
237        {
238          g.setColor(ccolor);
239          g.drawLine(2*x, transmute(C[x], imageHeight, scale),
240                     2*(x + 1), transmute(C[x+1], imageHeight, scale));
241        }
242        if (q==3)
243        {
244          g.setColor(tcolor);
245          g.drawLine(2*x, transmute(T[x], imageHeight, scale),
246                     2*(x + 1), transmute(T[x+1], imageHeight, scale));
247        }
248        if (q==4)
249        {
250          g.setColor(gcolor);
251          g.drawLine(2*x, transmute(G[x], imageHeight, scale),
252                     2*(x + 1), transmute(G[x+1], imageHeight, scale));
253        }
254        if (q==5)
255        {
256          if ((here > bc.length-1) || (basenum > seq.length-1)) break;
257          if (bc[here] == x)
258          {
259            g.drawLine(2*x, transmute(-2, imageHeight, 1.0),
260                       2*x, transmute(-7, imageHeight, 1.0));
261            if ((basenum+1)%10 == 0) //if the basecount is divisible by ten
262                            //add a number
263            {
264              g.drawLine(2*x, transmute(-20, imageHeight, 1.0),
265                         2*x, transmute(-25, imageHeight, 1.0));
266              g.drawString(Integer.toString(basenum+1),
267                           2*x-3, transmute(-36, imageHeight, 1.0));
268            }
269            switch (seq[basenum])
270            {
271              case 'A': case 'a': g.setColor(acolor); break;
272              case 'C': case 'c': g.setColor(ccolor); break;
273              case 'G': case 'g': g.setColor(gcolor); break;
274              case 'T': case 't': g.setColor(tcolor); break;
275              default: g.setColor(ncolor);
276            }
277            g.drawChars(seq, basenum, 1,
278                    2*x-3, transmute(-18, imageHeight, 1.0));
279            g.setColor(Color.black);
280            here++; basenum++;
281          }
282        }
283      }
284    }
285    return out;
286  }
287
288/**
289 * Initialize all of the data fields for this object.
290 * @throws IllegalArgumentException which will propagate to all of the constructors.
291 */
292  private void initData(byte[] fileData)
293  {
294    TraceData = fileData;
295    if (isABI())
296    {
297      setIndex();
298      setBasecalls();
299      setQcalls();
300      setSeq();
301      setTraces();
302    }
303    else throw new IllegalArgumentException("Not a valid ABI file.");
304  }
305
306/**
307 * A utility method which fills array b with data from the trace starting at traceDataOffset.
308 */
309  private void getSubArray(byte[] b, int traceDataOffset)
310  {
311    for (int x=0; x<=b.length-1; x++)
312    {
313      b[x] = TraceData[traceDataOffset + x];
314    }
315  }
316
317/**
318 * Shuffle the pointers to point to the proper spots in the trace, then load the
319 * traces into their arrays.
320 */
321  private void setTraces()
322  {
323    int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T
324    int datas[] = new int[4];
325    char order[] = new char[4];
326
327    datas[0] = DATA9;
328    datas[1] = DATA10;
329    datas[2] = DATA11;
330    datas[3] = DATA12;
331
332    for (int i=0; i<=3; i++)
333    {
334      order[i]=(char) TraceData[FWO+i];
335    }
336
337    for (int i=0; i <=3; i++)
338    {
339      switch (order[i])
340      {
341        case 'A': case 'a':
342          pointers[0] = datas[i];
343          break;
344        case 'C': case 'c':
345          pointers[1] = datas[i];
346          break;
347        case 'G': case 'g':
348          pointers[2] = datas[i];
349          break;
350        case 'T': case 't':
351          pointers[3] = datas[i];
352          break;
353        default:
354          throw new IllegalArgumentException("Trace contains illegal values.");
355      }
356    }
357
358    A = new int[TraceLength];
359    C = new int[TraceLength];
360    G = new int[TraceLength];
361    T = new int[TraceLength];
362
363    for (int i=0; i <=3; i++)
364    {
365      byte[] qq = new byte[TraceLength*2];
366      getSubArray(qq, pointers[i]);
367      DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
368      for (int x=0; x <=TraceLength - 1; x++)
369      {
370        try
371        {
372          if (i == 0) A[x] = (int) dis.readShort();
373          if (i == 1) C[x] = (int) dis.readShort();
374          if (i == 2) G[x] = (int) dis.readShort();
375          if (i == 3) T[x] = (int) dis.readShort();
376        }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong.
377        {
378          throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
379        }
380      }
381    }
382    return;
383  }
384
385/**
386 * Fetch the sequence from the trace data.
387 */
388  private void setSeq()
389  {
390    char tempseq[] = new char[SeqLength];
391    for (int x = 0; x <= SeqLength - 1; ++x)
392    {
393      tempseq[x] = (char) TraceData[PBAS2 + x];
394    }
395    sequence = new String (tempseq);
396  }
397
398
399/**
400 * Fetch the basecalls from the trace data.
401 */
402  private void setBasecalls()
403  {
404    Basecalls = new int[SeqLength];
405    byte[] qq = new byte[SeqLength*2];
406    getSubArray(qq, PLOC);
407    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
408    for (int i = 0; i <= SeqLength -1; ++i)
409    {
410      try
411      {
412        Basecalls[i]=(int) dis.readShort();
413      }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong.
414      {
415        throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
416      }
417    }
418  }
419  
420    /**
421     * Fetch the quality calls from the trace data.
422     */
423    private void setQcalls() {
424        Qcalls = new int[SeqLength];
425        byte[] qq = new byte[SeqLength];
426        getSubArray(qq, PCON);
427        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
428        for (int i = 0; i <= SeqLength - 1; ++i) {
429            try {
430                Qcalls[i] = (int) dis.readByte();
431            } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong.
432            {
433                throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
434            }
435        }
436    }
437
438/**
439 * Utility method to return an int beginning at <code>pointer</code> in the TraceData array.
440 */
441  private int getIntAt(int pointer)
442  {
443    int out = 0;
444    byte[] temp = new byte[4];
445    getSubArray(temp, pointer);
446    try
447    {
448      DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp));
449      out = dis.readInt();
450    }catch(IOException e) //This shouldn't happen. If it does something must be seriously wrong.
451    {
452      throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
453    }
454    return out;
455  }
456
457/**
458 * Utility method to translate y coordinates from graph space (where up is greater)
459 * to image space (where down is greater).
460 */
461  private int transmute(int ya, int height, double scale)
462  {
463    return (height - 45 - (int) (ya * scale));
464  }
465
466/**
467 * Get the maximum height of any of the traces. The data is persisted for performance
468 * in the event of multiple calls, but it initialized lazily.
469 */
470  private int getMaximum()
471  {
472    if (maximum > 0) return maximum;
473    int max = 0;
474    for (int x=0; x<=T.length-1; x++)
475    {
476      if (T[x] > max) max = T[x];
477      if (A[x] > max) max = A[x];
478      if (C[x] > max) max = C[x];
479      if (G[x] > max) max = G[x];
480    }
481    return max;
482  }
483
484  //calculates the necessary scaling to allow the trace to fit vertically
485  //in the space specified.
486/**
487 * Returns the scaling factor necessary to allow all of the traces to fit vertically
488 * into the specified space.
489 * @param <code>height</code> - the required height in pixels.
490 */
491  private double calculateScale(int height)
492  {
493    double newScale = 0.0;
494    double max = (double)getMaximum();
495    double ht = (double)height;
496    newScale = ((ht - 50.0))/max;
497    return newScale;
498  }
499
500/**
501 * Sets up all of the initial pointers to the important records in TraceData.
502 */
503  private void setIndex()
504  {
505    int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords;
506    byte[] RecNameArray = new byte[4];
507    String RecName;
508
509    DataCounter = 0; PBASCounter = 0; PLOCCounter = 0; PCONCounter = 0;
510
511    IndexBase = getIntAt(AbsIndexBase + MacJunk);
512    NumRecords = getIntAt(AbsIndexBase - 8 + MacJunk);
513
514    for (int record = 0; record <= NumRecords - 1; record++)
515    {
516      getSubArray(RecNameArray, (IndexBase + (record * 28)));
517      RecName = new String (RecNameArray);
518      if (RecName.equals("FWO_"))
519        FWO = IndexBase + (record * 28) + 20;
520      if (RecName.equals("DATA"))
521      {
522        ++DataCounter;
523        if (DataCounter == 9)
524          DATA9 = IndexBase + (record * 28) + 20;
525        if (DataCounter == 10)
526          DATA10 = IndexBase + (record * 28) + 20;
527        if (DataCounter == 11)
528          DATA11 = IndexBase + (record * 28) + 20;
529        if (DataCounter == 12)
530          DATA12 = IndexBase + (record * 28) + 20;
531      }
532      if (RecName.equals("PBAS"))
533      {
534        ++PBASCounter;
535        if (PBASCounter == 2)
536          PBAS2 = IndexBase + (record * 28) + 20;
537      }
538      if (RecName.equals("PLOC"))
539      {
540        ++PLOCCounter;
541        if (PLOCCounter == 2)
542          PLOC = IndexBase + (record * 28) + 20;
543      }
544      if (RecName.equals("PCON"))
545      {
546        ++PCONCounter;
547        if (PCONCounter == 2)
548          PCON = IndexBase + (record * 28) + 20;
549      }
550
551    } //next record
552    TraceLength = getIntAt(DATA12 - 8);
553    SeqLength = getIntAt(PBAS2-4);
554    PLOC = getIntAt(PLOC) + MacJunk;
555    DATA9 = getIntAt(DATA9) + MacJunk;
556    DATA10 = getIntAt(DATA10) + MacJunk;
557    DATA11 = getIntAt(DATA11) + MacJunk;
558    DATA12 = getIntAt(DATA12) + MacJunk;
559    PBAS2 = getIntAt(PBAS2) + MacJunk;
560    PCON = getIntAt(PCON) + MacJunk;
561  }
562
563/**
564 * Test to see if the file is ABI format by checking to see that the first three bytes
565 * are "ABI". Also handle the special case where 128 bytes were prepended to the file
566 * due to binary FTP from an older macintosh system.
567 */
568  private boolean isABI()
569  {
570    char ABI[] = new char[4];
571
572    for (int i=0; i<=2; i++)
573    {
574      ABI[i]=(char) TraceData[i];
575    }
576    if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
577    {
578      return true;
579    }
580    else
581    {
582      for (int i=128; i<=130; i++)
583      {
584        ABI[i]=(char) TraceData[i];
585      }
586      if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
587      {
588        MacJunk=128;
589        return true;
590      }
591      else
592        return false;
593    }
594  }
595}
596