001 002/* 003 * BioJava development code 004 * 005 * This code may be freely distributed and modified under the 006 * terms of the GNU Lesser General Public Licence. This should 007 * be distributed with the code. If you do not have a copy, 008 * see: 009 * 010 * http://www.gnu.org/copyleft/lesser.html 011 * 012 * Copyright for this code is held jointly by the individual 013 * authors. These should be listed in @author doc comments. 014 * 015 * For more information on the BioJava project and its aims, 016 * or to join the biojava-l mailing list, visit the home page 017 * at: 018 * 019 * http://www.biojava.org/ 020 * 021 */ 022 023package org.biojava.bio.program.abi; 024 025import java.awt.Color; 026import java.awt.Graphics2D; 027import java.awt.image.BufferedImage; 028import java.io.BufferedInputStream; 029import java.io.ByteArrayInputStream; 030import java.io.ByteArrayOutputStream; 031import java.io.DataInputStream; 032import java.io.File; 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037 038import org.biojava.bio.BioError; 039import org.biojava.bio.seq.DNATools; 040import org.biojava.bio.symbol.AtomicSymbol; 041import org.biojava.bio.symbol.IllegalSymbolException; 042import org.biojava.bio.symbol.SymbolList; 043 044 045/** 046 * Title: ABITrace<br><br> 047 * ABITrace is a class for managing ABI file information, 048 * it is capable of opening an ABI file and storing 049 * the most important fields, which can be recalled as simple java types. It can also return 050 * an image corresponding to the trace. 051 * It has three constructors with input types <code>File, URL, and byte[]</code>.<br><br> 052 * ABI files contain two sets of basecall and sequence data, one that was originally 053 * created programatically and the other, which is an editable copy. This version of this object 054 * only references the original unedited data.<br> 055 * 056 * Copyright (c) 2001 057 * @author David H. Klatte, Ph.D. 058 * @author Matthew Pocock 059 * @version 0.5alpha 060 */ 061public class ABITrace 062{ 063 064 //the next three lines are the important persistent data 065 private String sequence; 066 private int A[], G[], C[], T[], Basecalls[], Qcalls[]; 067 private int TraceLength, SeqLength; 068 069 //This is the actual file data. 070 private byte[] TraceData; 071 072 private int maximum = 0; 073 074 //the next four declaration lines comprise the file index information 075 private int MacJunk=0; //sometimes when macintosh files are 076 //FTPed in binary form, they have 128 bytes 077 //of crap pre-pended to them. This constant 078 //allows ABITrace to handle that in a way that 079 //is invisible to the user. 080 private static int AbsIndexBase=26; //The file location of the Index pointer 081 private int IndexBase, PLOC, PCON; 082 083 //the next declaration is for the actual file pointers 084 private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO; 085 086/** 087 * The File constructor opens a local ABI file and parses the content. 088 * @param ABIFile is a <code>java.io.File</code> on the local file system. 089 * @throws IOException if there is a problem reading the file. 090 * @throws IllegalArgumentException if the file is not a valid ABI file. 091 */ 092 public ABITrace( File ABIFile ) throws IOException 093 { 094 byte[] bytes = null; 095 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 096 FileInputStream fis = new FileInputStream(ABIFile); 097 BufferedInputStream bis = new BufferedInputStream(fis); 098 int b; 099 while ((b = bis.read()) >= 0) 100 { 101 baos.write(b); 102 } 103 bis.close(); fis.close(); baos.close(); 104 bytes = baos.toByteArray(); 105 initData(bytes); 106 } 107 108/** 109 * The URL constructor opens an ABI file from any URL. 110 * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file. 111 * @throws IOException if there is a problem reading from the URL. 112 * @throws IllegalArgumentException if the URL does not contain a valid ABI file. 113 */ 114 public ABITrace( URL ABIFile ) throws IOException 115 { 116 byte[] bytes = null; 117 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 118 InputStream is = ABIFile.openStream(); 119 BufferedInputStream bis = new BufferedInputStream(is); 120 int b; 121 while ((b = bis.read()) >= 0) 122 { 123 baos.write(b); 124 } 125 bis.close(); is.close(); baos.close(); 126 bytes = baos.toByteArray(); 127 initData(bytes); 128 } 129 130/** 131 * The <code>byte[]</code> constructor parses an ABI file represented as a byte array. 132 * @throws IllegalArgumentException if the data does not represent a valid ABI file. 133 */ 134 public ABITrace(byte[] ABIFileData) 135 { 136 initData(ABIFileData); 137 } 138 139/** 140 * Returns the length of the sequence (number of bases) in this trace. 141 */ 142 public int getSequenceLength() { return SeqLength; } 143 144/** 145 * Returns the length of the trace (number of x-coordinate points in the graph). 146 */ 147 public int getTraceLength() { return TraceLength; } 148 149/** 150 * Returns an <code>int[]</code> array that represents the basecalls - each int in the 151 * array corresponds to an x-coordinate point in the graph that is a peak (a base location). 152 */ 153 public int[] getBasecalls() { return Basecalls; } 154 155 /** 156 * Returns an <code>int[]</code> array that represents the quality - each int in the 157 * array corresponds to an quality value 90-255) in the graph at a base location). 158 */ 159 public int[] getQcalls() { return Qcalls; } 160 161/** 162 * Returns the original programatically determined (unedited) sequence as a <code>SymbolList</code>. 163 */ 164 public SymbolList getSequence() throws BioError 165 { 166 try { 167 return DNATools.createDNA(sequence); 168 } 169 catch (IllegalSymbolException ise) { 170 // this should be impossible! 171 throw new BioError(ise); 172 } 173 } 174 175/** 176 * Returns one of the four traces - all of the y-coordinate values, 177 * each of which correspond to a single x-coordinate relative to the 178 * position in the array, so that if element 4 in the array is 972, then 179 * x is 4 and y is 972 for that point. 180 * 181 * @param base the DNA AttomicSymbol to retrieve the trace values for 182 * @return an array of ints giving the entire trace for that base 183 * @throws IllegalSymbolException if the base is not valid 184 */ 185 public int[] getTrace (AtomicSymbol base) throws IllegalSymbolException 186 { 187 if (base == DNATools.a()) { 188 return A; 189 } else if (base == DNATools.c()) { 190 return C; 191 } else if (base == DNATools.g()) { 192 return G; 193 } else if (base == DNATools.t()) { 194 return T; 195 } else { 196 DNATools.getDNA().validate(base); 197 throw new IllegalSymbolException("Don't know symbol: " + base); 198 } 199 } 200 201/** 202 * Returns a BufferedImage that represents the entire trace. The height can be set precisely in 203 * pixels, the width in pixels is determined by the scaling factor times the number 204 * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented 205 * in the returned image. 206 * 207 * @param imageHeight is the desired height of the image in pixels. 208 * @param widthScale indiates how many horizontal pixels to use to represent a single x-coordinate (try 2). 209 */ 210 public BufferedImage getImage(int imageHeight, int widthScale) 211 { 212 BufferedImage out = new BufferedImage(TraceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED); 213 Graphics2D g = out.createGraphics(); 214 Color acolor = Color.green.darker(); 215 Color ccolor = Color.blue; 216 Color gcolor = Color.black; 217 Color tcolor = Color.red; 218 Color ncolor = Color.pink; 219 double scale = calculateScale(imageHeight); 220 int[] bc = Basecalls; 221 char[] seq = sequence.toCharArray(); 222 g.setBackground(Color.white); 223 g.clearRect(0, 0, TraceLength * widthScale, imageHeight); 224 int here = 0; 225 int basenum = 0; 226 for (int q = 1; q <= 5; q++) 227 { 228 for (int x = 0; x <= TraceLength - 2; x++) 229 { 230 if (q==1) 231 { 232 g.setColor(acolor); 233 g.drawLine(2*x, transmute(A[x], imageHeight, scale), 234 2*(x + 1), transmute(A[x+1], imageHeight, scale)); 235 } 236 if (q==2) 237 { 238 g.setColor(ccolor); 239 g.drawLine(2*x, transmute(C[x], imageHeight, scale), 240 2*(x + 1), transmute(C[x+1], imageHeight, scale)); 241 } 242 if (q==3) 243 { 244 g.setColor(tcolor); 245 g.drawLine(2*x, transmute(T[x], imageHeight, scale), 246 2*(x + 1), transmute(T[x+1], imageHeight, scale)); 247 } 248 if (q==4) 249 { 250 g.setColor(gcolor); 251 g.drawLine(2*x, transmute(G[x], imageHeight, scale), 252 2*(x + 1), transmute(G[x+1], imageHeight, scale)); 253 } 254 if (q==5) 255 { 256 if ((here > bc.length-1) || (basenum > seq.length-1)) break; 257 if (bc[here] == x) 258 { 259 g.drawLine(2*x, transmute(-2, imageHeight, 1.0), 260 2*x, transmute(-7, imageHeight, 1.0)); 261 if ((basenum+1)%10 == 0) //if the basecount is divisible by ten 262 //add a number 263 { 264 g.drawLine(2*x, transmute(-20, imageHeight, 1.0), 265 2*x, transmute(-25, imageHeight, 1.0)); 266 g.drawString(Integer.toString(basenum+1), 267 2*x-3, transmute(-36, imageHeight, 1.0)); 268 } 269 switch (seq[basenum]) 270 { 271 case 'A': case 'a': g.setColor(acolor); break; 272 case 'C': case 'c': g.setColor(ccolor); break; 273 case 'G': case 'g': g.setColor(gcolor); break; 274 case 'T': case 't': g.setColor(tcolor); break; 275 default: g.setColor(ncolor); 276 } 277 g.drawChars(seq, basenum, 1, 278 2*x-3, transmute(-18, imageHeight, 1.0)); 279 g.setColor(Color.black); 280 here++; basenum++; 281 } 282 } 283 } 284 } 285 return out; 286 } 287 288/** 289 * Initialize all of the data fields for this object. 290 * @throws IllegalArgumentException which will propagate to all of the constructors. 291 */ 292 private void initData(byte[] fileData) 293 { 294 TraceData = fileData; 295 if (isABI()) 296 { 297 setIndex(); 298 setBasecalls(); 299 setQcalls(); 300 setSeq(); 301 setTraces(); 302 } 303 else throw new IllegalArgumentException("Not a valid ABI file."); 304 } 305 306/** 307 * A utility method which fills array b with data from the trace starting at traceDataOffset. 308 */ 309 private void getSubArray(byte[] b, int traceDataOffset) 310 { 311 for (int x=0; x<=b.length-1; x++) 312 { 313 b[x] = TraceData[traceDataOffset + x]; 314 } 315 } 316 317/** 318 * Shuffle the pointers to point to the proper spots in the trace, then load the 319 * traces into their arrays. 320 */ 321 private void setTraces() 322 { 323 int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T 324 int datas[] = new int[4]; 325 char order[] = new char[4]; 326 327 datas[0] = DATA9; 328 datas[1] = DATA10; 329 datas[2] = DATA11; 330 datas[3] = DATA12; 331 332 for (int i=0; i<=3; i++) 333 { 334 order[i]=(char) TraceData[FWO+i]; 335 } 336 337 for (int i=0; i <=3; i++) 338 { 339 switch (order[i]) 340 { 341 case 'A': case 'a': 342 pointers[0] = datas[i]; 343 break; 344 case 'C': case 'c': 345 pointers[1] = datas[i]; 346 break; 347 case 'G': case 'g': 348 pointers[2] = datas[i]; 349 break; 350 case 'T': case 't': 351 pointers[3] = datas[i]; 352 break; 353 default: 354 throw new IllegalArgumentException("Trace contains illegal values."); 355 } 356 } 357 358 A = new int[TraceLength]; 359 C = new int[TraceLength]; 360 G = new int[TraceLength]; 361 T = new int[TraceLength]; 362 363 for (int i=0; i <=3; i++) 364 { 365 byte[] qq = new byte[TraceLength*2]; 366 getSubArray(qq, pointers[i]); 367 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 368 for (int x=0; x <=TraceLength - 1; x++) 369 { 370 try 371 { 372 if (i == 0) A[x] = (int) dis.readShort(); 373 if (i == 1) C[x] = (int) dis.readShort(); 374 if (i == 2) G[x] = (int) dis.readShort(); 375 if (i == 3) T[x] = (int) dis.readShort(); 376 }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong. 377 { 378 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 379 } 380 } 381 } 382 return; 383 } 384 385/** 386 * Fetch the sequence from the trace data. 387 */ 388 private void setSeq() 389 { 390 char tempseq[] = new char[SeqLength]; 391 for (int x = 0; x <= SeqLength - 1; ++x) 392 { 393 tempseq[x] = (char) TraceData[PBAS2 + x]; 394 } 395 sequence = new String (tempseq); 396 } 397 398 399/** 400 * Fetch the basecalls from the trace data. 401 */ 402 private void setBasecalls() 403 { 404 Basecalls = new int[SeqLength]; 405 byte[] qq = new byte[SeqLength*2]; 406 getSubArray(qq, PLOC); 407 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 408 for (int i = 0; i <= SeqLength -1; ++i) 409 { 410 try 411 { 412 Basecalls[i]=(int) dis.readShort(); 413 }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong. 414 { 415 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 416 } 417 } 418 } 419 420 /** 421 * Fetch the quality calls from the trace data. 422 */ 423 private void setQcalls() { 424 Qcalls = new int[SeqLength]; 425 byte[] qq = new byte[SeqLength]; 426 getSubArray(qq, PCON); 427 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 428 for (int i = 0; i <= SeqLength - 1; ++i) { 429 try { 430 Qcalls[i] = (int) dis.readByte(); 431 } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong. 432 { 433 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 434 } 435 } 436 } 437 438/** 439 * Utility method to return an int beginning at <code>pointer</code> in the TraceData array. 440 */ 441 private int getIntAt(int pointer) 442 { 443 int out = 0; 444 byte[] temp = new byte[4]; 445 getSubArray(temp, pointer); 446 try 447 { 448 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp)); 449 out = dis.readInt(); 450 }catch(IOException e) //This shouldn't happen. If it does something must be seriously wrong. 451 { 452 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 453 } 454 return out; 455 } 456 457/** 458 * Utility method to translate y coordinates from graph space (where up is greater) 459 * to image space (where down is greater). 460 */ 461 private int transmute(int ya, int height, double scale) 462 { 463 return (height - 45 - (int) (ya * scale)); 464 } 465 466/** 467 * Get the maximum height of any of the traces. The data is persisted for performance 468 * in the event of multiple calls, but it initialized lazily. 469 */ 470 private int getMaximum() 471 { 472 if (maximum > 0) return maximum; 473 int max = 0; 474 for (int x=0; x<=T.length-1; x++) 475 { 476 if (T[x] > max) max = T[x]; 477 if (A[x] > max) max = A[x]; 478 if (C[x] > max) max = C[x]; 479 if (G[x] > max) max = G[x]; 480 } 481 return max; 482 } 483 484 //calculates the necessary scaling to allow the trace to fit vertically 485 //in the space specified. 486/** 487 * Returns the scaling factor necessary to allow all of the traces to fit vertically 488 * into the specified space. 489 * @param <code>height</code> - the required height in pixels. 490 */ 491 private double calculateScale(int height) 492 { 493 double newScale = 0.0; 494 double max = (double)getMaximum(); 495 double ht = (double)height; 496 newScale = ((ht - 50.0))/max; 497 return newScale; 498 } 499 500/** 501 * Sets up all of the initial pointers to the important records in TraceData. 502 */ 503 private void setIndex() 504 { 505 int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords; 506 byte[] RecNameArray = new byte[4]; 507 String RecName; 508 509 DataCounter = 0; PBASCounter = 0; PLOCCounter = 0; PCONCounter = 0; 510 511 IndexBase = getIntAt(AbsIndexBase + MacJunk); 512 NumRecords = getIntAt(AbsIndexBase - 8 + MacJunk); 513 514 for (int record = 0; record <= NumRecords - 1; record++) 515 { 516 getSubArray(RecNameArray, (IndexBase + (record * 28))); 517 RecName = new String (RecNameArray); 518 if (RecName.equals("FWO_")) 519 FWO = IndexBase + (record * 28) + 20; 520 if (RecName.equals("DATA")) 521 { 522 ++DataCounter; 523 if (DataCounter == 9) 524 DATA9 = IndexBase + (record * 28) + 20; 525 if (DataCounter == 10) 526 DATA10 = IndexBase + (record * 28) + 20; 527 if (DataCounter == 11) 528 DATA11 = IndexBase + (record * 28) + 20; 529 if (DataCounter == 12) 530 DATA12 = IndexBase + (record * 28) + 20; 531 } 532 if (RecName.equals("PBAS")) 533 { 534 ++PBASCounter; 535 if (PBASCounter == 2) 536 PBAS2 = IndexBase + (record * 28) + 20; 537 } 538 if (RecName.equals("PLOC")) 539 { 540 ++PLOCCounter; 541 if (PLOCCounter == 2) 542 PLOC = IndexBase + (record * 28) + 20; 543 } 544 if (RecName.equals("PCON")) 545 { 546 ++PCONCounter; 547 if (PCONCounter == 2) 548 PCON = IndexBase + (record * 28) + 20; 549 } 550 551 } //next record 552 TraceLength = getIntAt(DATA12 - 8); 553 SeqLength = getIntAt(PBAS2-4); 554 PLOC = getIntAt(PLOC) + MacJunk; 555 DATA9 = getIntAt(DATA9) + MacJunk; 556 DATA10 = getIntAt(DATA10) + MacJunk; 557 DATA11 = getIntAt(DATA11) + MacJunk; 558 DATA12 = getIntAt(DATA12) + MacJunk; 559 PBAS2 = getIntAt(PBAS2) + MacJunk; 560 PCON = getIntAt(PCON) + MacJunk; 561 } 562 563/** 564 * Test to see if the file is ABI format by checking to see that the first three bytes 565 * are "ABI". Also handle the special case where 128 bytes were prepended to the file 566 * due to binary FTP from an older macintosh system. 567 */ 568 private boolean isABI() 569 { 570 char ABI[] = new char[4]; 571 572 for (int i=0; i<=2; i++) 573 { 574 ABI[i]=(char) TraceData[i]; 575 } 576 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) 577 { 578 return true; 579 } 580 else 581 { 582 for (int i=128; i<=130; i++) 583 { 584 ABI[i]=(char) TraceData[i]; 585 } 586 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) 587 { 588 MacJunk=128; 589 return true; 590 } 591 else 592 return false; 593 } 594 } 595} 596