001 
002/*
003 *                    BioJava development code
004 *
005 * This code may be freely distributed and modified under the
006 * terms of the GNU Lesser General Public Licence.  This should
007 * be distributed with the code.  If you do not have a copy,
008 * see:
009 *
010 *      http://www.gnu.org/copyleft/lesser.html
011 *
012 * Copyright for this code is held jointly by the individual
013 * authors.  These should be listed in @author doc comments.
014 *
015 * For more information on the BioJava project and its aims,
016 * or to join the biojava-l mailing list, visit the home page
017 * at:
018 *
019 *      http://www.biojava.org/
020 *
021 */
022 
023package org.biojava.bio.program.abi;
024
025import java.awt.Color;
026import java.awt.Graphics2D;
027import java.awt.image.BufferedImage;
028import java.io.BufferedInputStream;
029import java.io.ByteArrayInputStream;
030import java.io.ByteArrayOutputStream;
031import java.io.DataInputStream;
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.net.URL;
037
038import org.biojava.bio.BioError;
039import org.biojava.bio.seq.DNATools;
040import org.biojava.bio.symbol.AtomicSymbol;
041import org.biojava.bio.symbol.IllegalSymbolException;
042import org.biojava.bio.symbol.SymbolList;
043
044
045/**
046 * Title: ABITrace<br><br>
047 * ABITrace is a class for managing ABI file information,
048 *  it is capable of opening an ABI file and storing
049 *  the most important fields, which can be recalled as simple java types. It can also return
050 *  an image corresponding to the trace.
051 *  It has three constructors with input types <code>File, URL, and byte[]</code>.<br><br>
052 *  ABI files contain two sets of basecall and sequence data, one that was originally
053 *  created programatically and the other, which is an editable copy. This version of this object
054 *  only references the original unedited data.<br>
055 *
056 * Copyright (c) 2001
057 * @author David H. Klatte, Ph.D.
058 * @author Matthew Pocock
059 * @version 0.5alpha
060 */
061public class ABITrace
062{
063
064  //the next three lines are the important persistent data
065  private String sequence;
066  private int A[], G[], C[], T[], Basecalls[];
067  private int TraceLength, SeqLength;
068
069  //This is the actual file data.
070  private byte[] TraceData;
071
072  private int maximum = 0;
073
074  //the next four declaration lines comprise the file index information
075  private int MacJunk=0; //sometimes when macintosh files are
076                         //FTPed in binary form, they have 128 bytes
077                         //of crap pre-pended to them. This constant
078                         //allows ABITrace to handle that in a way that
079                         //is invisible to the user.
080  private static int AbsIndexBase=26; //The file location of the Index pointer
081  private int IndexBase,  PLOC;
082
083  //the next declaration is for the actual file pointers
084  private  int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO;
085
086/**
087 * The File constructor opens a local ABI file and parses the content.
088 * @param ABIFile is a <code>java.io.File</code> on the local file system.
089 * @throws IOException if there is a problem reading the file.
090 * @throws IllegalArgumentException if the file is not a valid ABI file.
091 */
092  public ABITrace( File ABIFile ) throws IOException
093  {
094    byte[] bytes = null;
095    ByteArrayOutputStream baos = new ByteArrayOutputStream();
096    FileInputStream fis = new FileInputStream(ABIFile);
097    BufferedInputStream bis = new BufferedInputStream(fis);
098    int b;
099    while ((b = bis.read()) >= 0)
100    {
101      baos.write(b);
102    }
103    bis.close(); fis.close(); baos.close();
104    bytes = baos.toByteArray();
105    initData(bytes);
106  }
107
108/**
109 * The URL constructor opens an ABI file from any URL.
110 * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file.
111 * @throws IOException if there is a problem reading from the URL.
112 * @throws IllegalArgumentException if the URL does not contain a valid ABI file.
113 */
114  public ABITrace( URL ABIFile ) throws IOException
115  {
116    byte[] bytes = null;
117    ByteArrayOutputStream baos = new ByteArrayOutputStream();
118    InputStream is = ABIFile.openStream();
119    BufferedInputStream bis = new BufferedInputStream(is);
120    int b;
121    while ((b = bis.read()) >= 0)
122    {
123      baos.write(b);
124    }
125    bis.close(); is.close(); baos.close();
126    bytes = baos.toByteArray();
127    initData(bytes);
128  }
129
130/**
131 * The <code>byte[]</code> constructor parses an ABI file represented as a byte array.
132 * @throws IllegalArgumentException if the data does not represent a valid ABI file.
133 */
134  public ABITrace(byte[] ABIFileData)
135  {
136    initData(ABIFileData);
137  }
138
139/**
140 * Returns the length of the sequence (number of bases) in this trace.
141 */
142  public int getSequenceLength() { return SeqLength; }
143
144/**
145 * Returns the length of the trace (number of x-coordinate points in the graph).
146 */
147  public int getTraceLength() { return TraceLength; }
148
149/**
150 * Returns an <code>int[]</code> array that represents the basecalls - each int in the
151 * array corresponds to an x-coordinate point in the graph that is a peak (a base location).
152 */
153  public int[] getBasecalls() { return Basecalls; }
154
155/**
156 * Returns the original programatically determined (unedited) sequence as a <code>SymbolList</code>.
157 */
158  public SymbolList getSequence() throws BioError
159  { 
160    try {
161      return DNATools.createDNA(sequence); 
162    }
163    catch (IllegalSymbolException ise) {
164      // this should be impossible!
165      throw new BioError(ise);
166    }
167  }
168
169/**
170 * Returns one of the four traces - all of the y-coordinate values,
171 * each of which correspond to a single x-coordinate relative to the
172 * position in the array, so that if element 4 in the array is 972, then
173 * x is 4 and y is 972 for that point.
174 *
175 * @param base  the DNA AttomicSymbol to retrieve the trace values for
176 * @return an array of ints giving the entire trace for that base
177 * @throws IllegalSymbolException if the base is not valid
178 */
179  public int[] getTrace (AtomicSymbol base) throws IllegalSymbolException
180  {
181    if (base == DNATools.a()) {
182      return A;
183    } else if (base == DNATools.c()) {
184      return C;
185    } else if (base == DNATools.g()) {
186      return G;
187    } else if (base == DNATools.t()) {
188      return T;
189    } else {
190      DNATools.getDNA().validate(base);
191      throw new IllegalSymbolException("Don't know symbol: " + base);
192    }
193  }
194
195/**
196 * Returns a BufferedImage that represents the entire trace. The height can be set precisely in
197 * pixels, the width in pixels is determined by the scaling factor times the number
198 * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented
199 * in the returned image.
200 *
201 * @param imageHeight is the desired height of the image in pixels.
202 * @param widthScale indiates how many horizontal pixels to use to represent a single x-coordinate (try 2).
203 */
204  public BufferedImage getImage(int imageHeight, int widthScale)
205  {
206    BufferedImage out = new BufferedImage(TraceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED);
207    Graphics2D g = out.createGraphics();
208    Color acolor = Color.green.darker();
209    Color ccolor = Color.blue;
210    Color gcolor = Color.black;
211    Color tcolor = Color.red;
212    Color ncolor = Color.pink;
213    double scale = calculateScale(imageHeight);
214    int[] bc = Basecalls;
215    char[] seq = sequence.toCharArray();
216    g.setBackground(Color.white);
217    g.clearRect(0, 0, TraceLength * widthScale, imageHeight);
218    int here = 0;
219    int basenum = 0;
220    for (int q = 1; q <= 5; q++)
221    {
222      for (int x = 0; x <= TraceLength - 2; x++)
223      {
224        if (q==1)
225        {
226          g.setColor(acolor);
227          g.drawLine(2*x, transmute(A[x], imageHeight, scale),
228                     2*(x + 1), transmute(A[x+1], imageHeight, scale));
229        }
230        if (q==2)
231        {
232          g.setColor(ccolor);
233          g.drawLine(2*x, transmute(C[x], imageHeight, scale),
234                     2*(x + 1), transmute(C[x+1], imageHeight, scale));
235        }
236        if (q==3)
237        {
238          g.setColor(tcolor);
239          g.drawLine(2*x, transmute(T[x], imageHeight, scale),
240                     2*(x + 1), transmute(T[x+1], imageHeight, scale));
241        }
242        if (q==4)
243        {
244          g.setColor(gcolor);
245          g.drawLine(2*x, transmute(G[x], imageHeight, scale),
246                     2*(x + 1), transmute(G[x+1], imageHeight, scale));
247        }
248        if (q==5)
249        {
250          if ((here > bc.length-1) || (basenum > seq.length-1)) break;
251          if (bc[here] == x)
252          {
253            g.drawLine(2*x, transmute(-2, imageHeight, 1.0),
254                       2*x, transmute(-7, imageHeight, 1.0));
255            if ((basenum+1)%10 == 0) //if the basecount is divisible by ten
256                            //add a number
257            {
258              g.drawLine(2*x, transmute(-20, imageHeight, 1.0),
259                         2*x, transmute(-25, imageHeight, 1.0));
260              g.drawString(Integer.toString(basenum+1),
261                           2*x-3, transmute(-36, imageHeight, 1.0));
262            }
263            switch (seq[basenum])
264            {
265              case 'A': case 'a': g.setColor(acolor); break;
266              case 'C': case 'c': g.setColor(ccolor); break;
267              case 'G': case 'g': g.setColor(gcolor); break;
268              case 'T': case 't': g.setColor(tcolor); break;
269              default: g.setColor(ncolor);
270            }
271            g.drawChars(seq, basenum, 1,
272                    2*x-3, transmute(-18, imageHeight, 1.0));
273            g.setColor(Color.black);
274            here++; basenum++;
275          }
276        }
277      }
278    }
279    return out;
280  }
281
282/**
283 * Initialize all of the data fields for this object.
284 * @throws IllegalArgumentException which will propagate to all of the constructors.
285 */
286  private void initData(byte[] fileData)
287  {
288    TraceData = fileData;
289    if (isABI())
290    {
291      setIndex();
292      setBasecalls();
293      setSeq();
294      setTraces();
295    }
296    else throw new IllegalArgumentException("Not a valid ABI file.");
297  }
298
299/**
300 * A utility method which fills array b with data from the trace starting at traceDataOffset.
301 */
302  private void getSubArray(byte[] b, int traceDataOffset)
303  {
304    for (int x=0; x<=b.length-1; x++)
305    {
306      b[x] = TraceData[traceDataOffset + x];
307    }
308  }
309
310/**
311 * Shuffle the pointers to point to the proper spots in the trace, then load the
312 * traces into their arrays.
313 */
314  private void setTraces()
315  {
316    int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T
317    int datas[] = new int[4];
318    char order[] = new char[4];
319
320    datas[0] = DATA9;
321    datas[1] = DATA10;
322    datas[2] = DATA11;
323    datas[3] = DATA12;
324
325    for (int i=0; i<=3; i++)
326    {
327      order[i]=(char) TraceData[FWO+i];
328    }
329
330    for (int i=0; i <=3; i++)
331    {
332      switch (order[i])
333      {
334        case 'A': case 'a':
335          pointers[0] = datas[i];
336          break;
337        case 'C': case 'c':
338          pointers[1] = datas[i];
339          break;
340        case 'G': case 'g':
341          pointers[2] = datas[i];
342          break;
343        case 'T': case 't':
344          pointers[3] = datas[i];
345          break;
346        default:
347          throw new IllegalArgumentException("Trace contains illegal values.");
348      }
349    }
350
351    A = new int[TraceLength];
352    C = new int[TraceLength];
353    G = new int[TraceLength];
354    T = new int[TraceLength];
355
356    for (int i=0; i <=3; i++)
357    {
358      byte[] qq = new byte[TraceLength*2];
359      getSubArray(qq, pointers[i]);
360      DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
361      for (int x=0; x <=TraceLength - 1; x++)
362      {
363        try
364        {
365          if (i == 0) A[x] = (int) dis.readShort();
366          if (i == 1) C[x] = (int) dis.readShort();
367          if (i == 2) G[x] = (int) dis.readShort();
368          if (i == 3) T[x] = (int) dis.readShort();
369        }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong.
370        {
371          throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
372        }
373      }
374    }
375    return;
376  }
377
378/**
379 * Fetch the sequence from the trace data.
380 */
381  private void setSeq()
382  {
383    char tempseq[] = new char[SeqLength];
384    for (int x = 0; x <= SeqLength - 1; ++x)
385    {
386      tempseq[x] = (char) TraceData[PBAS2 + x];
387    }
388    sequence = new String (tempseq);
389  }
390
391
392/**
393 * Fetch the basecalls from the trace data.
394 */
395  private void setBasecalls()
396  {
397    Basecalls = new int[SeqLength];
398    byte[] qq = new byte[SeqLength*2];
399    getSubArray(qq, PLOC);
400    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq));
401    for (int i = 0; i <= SeqLength -1; ++i)
402    {
403      try
404      {
405        Basecalls[i]=(int) dis.readShort();
406      }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong.
407      {
408        throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
409      }
410    }
411  }
412
413/**
414 * Utility method to return an int beginning at <code>pointer</code> in the TraceData array.
415 */
416  private int getIntAt(int pointer)
417  {
418    int out = 0;
419    byte[] temp = new byte[4];
420    getSubArray(temp, pointer);
421    try
422    {
423      DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp));
424      out = dis.readInt();
425    }catch(IOException e) //This shouldn't happen. If it does something must be seriously wrong.
426    {
427      throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams.");
428    }
429    return out;
430  }
431
432/**
433 * Utility method to translate y coordinates from graph space (where up is greater)
434 * to image space (where down is greater).
435 */
436  private int transmute(int ya, int height, double scale)
437  {
438    return (height - 45 - (int) (ya * scale));
439  }
440
441/**
442 * Get the maximum height of any of the traces. The data is persisted for performance
443 * in the event of multiple calls, but it initialized lazily.
444 */
445  private int getMaximum()
446  {
447    if (maximum > 0) return maximum;
448    int max = 0;
449    for (int x=0; x<=T.length-1; x++)
450    {
451      if (T[x] > max) max = T[x];
452      if (A[x] > max) max = A[x];
453      if (C[x] > max) max = C[x];
454      if (G[x] > max) max = G[x];
455    }
456    return max;
457  }
458
459  //calculates the necessary scaling to allow the trace to fit vertically
460  //in the space specified.
461/**
462 * Returns the scaling factor necessary to allow all of the traces to fit vertically
463 * into the specified space.
464 * @param <code>height</code> - the required height in pixels.
465 */
466  private double calculateScale(int height)
467  {
468    double newScale = 0.0;
469    double max = (double)getMaximum();
470    double ht = (double)height;
471    newScale = ((ht - 50.0))/max;
472    return newScale;
473  }
474
475/**
476 * Sets up all of the initial pointers to the important records in TraceData.
477 */
478  private void setIndex()
479  {
480    int DataCounter, PBASCounter, PLOCCounter, NumRecords;
481    byte[] RecNameArray = new byte[4];
482    String RecName;
483
484    DataCounter = 0; PBASCounter = 0; PLOCCounter = 0;
485
486    IndexBase = getIntAt(AbsIndexBase + MacJunk);
487    NumRecords = getIntAt(AbsIndexBase - 8 + MacJunk);
488
489    for (int record = 0; record <= NumRecords - 1; record++)
490    {
491      getSubArray(RecNameArray, (IndexBase + (record * 28)));
492      RecName = new String (RecNameArray);
493      if (RecName.equals("FWO_"))
494        FWO = IndexBase + (record * 28) + 20;
495      if (RecName.equals("DATA"))
496      {
497        ++DataCounter;
498        if (DataCounter == 9)
499          DATA9 = IndexBase + (record * 28) + 20;
500        if (DataCounter == 10)
501          DATA10 = IndexBase + (record * 28) + 20;
502        if (DataCounter == 11)
503          DATA11 = IndexBase + (record * 28) + 20;
504        if (DataCounter == 12)
505          DATA12 = IndexBase + (record * 28) + 20;
506      }
507      if (RecName.equals("PBAS"))
508      {
509        ++PBASCounter;
510        if (PBASCounter == 2)
511          PBAS2 = IndexBase + (record * 28) + 20;
512      }
513      if (RecName.equals("PLOC"))
514      {
515        ++PLOCCounter;
516        if (PLOCCounter == 2)
517          PLOC = IndexBase + (record * 28) + 20;
518      }
519
520    } //next record
521    TraceLength = getIntAt(DATA12 - 8);
522    SeqLength = getIntAt(PBAS2-4);
523    PLOC = getIntAt(PLOC) + MacJunk;
524    DATA9 = getIntAt(DATA9) + MacJunk;
525    DATA10 = getIntAt(DATA10) + MacJunk;
526    DATA11 = getIntAt(DATA11) + MacJunk;
527    DATA12 = getIntAt(DATA12) + MacJunk;
528    PBAS2 = getIntAt(PBAS2) + MacJunk;
529  }
530
531/**
532 * Test to see if the file is ABI format by checking to see that the first three bytes
533 * are "ABI". Also handle the special case where 128 bytes were prepended to the file
534 * due to binary FTP from an older macintosh system.
535 */
536  private boolean isABI()
537  {
538    char ABI[] = new char[4];
539
540    for (int i=0; i<=2; i++)
541    {
542      ABI[i]=(char) TraceData[i];
543    }
544    if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
545    {
546      return true;
547    }
548    else
549    {
550      for (int i=128; i<=130; i++)
551      {
552        ABI[i]=(char) TraceData[i];
553      }
554      if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
555      {
556        MacJunk=128;
557        return true;
558      }
559      else
560        return false;
561    }
562  }
563}
564