001 002/* 003 * BioJava development code 004 * 005 * This code may be freely distributed and modified under the 006 * terms of the GNU Lesser General Public Licence. This should 007 * be distributed with the code. If you do not have a copy, 008 * see: 009 * 010 * http://www.gnu.org/copyleft/lesser.html 011 * 012 * Copyright for this code is held jointly by the individual 013 * authors. These should be listed in @author doc comments. 014 * 015 * For more information on the BioJava project and its aims, 016 * or to join the biojava-l mailing list, visit the home page 017 * at: 018 * 019 * http://www.biojava.org/ 020 * 021 */ 022 023package org.biojava.bio.program.abi; 024 025import java.awt.Color; 026import java.awt.Graphics2D; 027import java.awt.image.BufferedImage; 028import java.io.BufferedInputStream; 029import java.io.ByteArrayInputStream; 030import java.io.ByteArrayOutputStream; 031import java.io.DataInputStream; 032import java.io.File; 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037 038import org.biojava.bio.BioError; 039import org.biojava.bio.seq.DNATools; 040import org.biojava.bio.symbol.AtomicSymbol; 041import org.biojava.bio.symbol.IllegalSymbolException; 042import org.biojava.bio.symbol.SymbolList; 043 044 045/** 046 * Title: ABITrace<br><br> 047 * ABITrace is a class for managing ABI file information, 048 * it is capable of opening an ABI file and storing 049 * the most important fields, which can be recalled as simple java types. It can also return 050 * an image corresponding to the trace. 051 * It has three constructors with input types <code>File, URL, and byte[]</code>.<br><br> 052 * ABI files contain two sets of basecall and sequence data, one that was originally 053 * created programatically and the other, which is an editable copy. This version of this object 054 * only references the original unedited data.<br> 055 * 056 * Copyright (c) 2001 057 * @author David H. Klatte, Ph.D. 058 * @author Matthew Pocock 059 * @version 0.5alpha 060 */ 061public class ABITrace 062{ 063 064 //the next three lines are the important persistent data 065 private String sequence; 066 private int A[], G[], C[], T[], Basecalls[]; 067 private int TraceLength, SeqLength; 068 069 //This is the actual file data. 070 private byte[] TraceData; 071 072 private int maximum = 0; 073 074 //the next four declaration lines comprise the file index information 075 private int MacJunk=0; //sometimes when macintosh files are 076 //FTPed in binary form, they have 128 bytes 077 //of crap pre-pended to them. This constant 078 //allows ABITrace to handle that in a way that 079 //is invisible to the user. 080 private static int AbsIndexBase=26; //The file location of the Index pointer 081 private int IndexBase, PLOC; 082 083 //the next declaration is for the actual file pointers 084 private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO; 085 086/** 087 * The File constructor opens a local ABI file and parses the content. 088 * @param ABIFile is a <code>java.io.File</code> on the local file system. 089 * @throws IOException if there is a problem reading the file. 090 * @throws IllegalArgumentException if the file is not a valid ABI file. 091 */ 092 public ABITrace( File ABIFile ) throws IOException 093 { 094 byte[] bytes = null; 095 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 096 FileInputStream fis = new FileInputStream(ABIFile); 097 BufferedInputStream bis = new BufferedInputStream(fis); 098 int b; 099 while ((b = bis.read()) >= 0) 100 { 101 baos.write(b); 102 } 103 bis.close(); fis.close(); baos.close(); 104 bytes = baos.toByteArray(); 105 initData(bytes); 106 } 107 108/** 109 * The URL constructor opens an ABI file from any URL. 110 * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file. 111 * @throws IOException if there is a problem reading from the URL. 112 * @throws IllegalArgumentException if the URL does not contain a valid ABI file. 113 */ 114 public ABITrace( URL ABIFile ) throws IOException 115 { 116 byte[] bytes = null; 117 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 118 InputStream is = ABIFile.openStream(); 119 BufferedInputStream bis = new BufferedInputStream(is); 120 int b; 121 while ((b = bis.read()) >= 0) 122 { 123 baos.write(b); 124 } 125 bis.close(); is.close(); baos.close(); 126 bytes = baos.toByteArray(); 127 initData(bytes); 128 } 129 130/** 131 * The <code>byte[]</code> constructor parses an ABI file represented as a byte array. 132 * @throws IllegalArgumentException if the data does not represent a valid ABI file. 133 */ 134 public ABITrace(byte[] ABIFileData) 135 { 136 initData(ABIFileData); 137 } 138 139/** 140 * Returns the length of the sequence (number of bases) in this trace. 141 */ 142 public int getSequenceLength() { return SeqLength; } 143 144/** 145 * Returns the length of the trace (number of x-coordinate points in the graph). 146 */ 147 public int getTraceLength() { return TraceLength; } 148 149/** 150 * Returns an <code>int[]</code> array that represents the basecalls - each int in the 151 * array corresponds to an x-coordinate point in the graph that is a peak (a base location). 152 */ 153 public int[] getBasecalls() { return Basecalls; } 154 155/** 156 * Returns the original programatically determined (unedited) sequence as a <code>SymbolList</code>. 157 */ 158 public SymbolList getSequence() throws BioError 159 { 160 try { 161 return DNATools.createDNA(sequence); 162 } 163 catch (IllegalSymbolException ise) { 164 // this should be impossible! 165 throw new BioError(ise); 166 } 167 } 168 169/** 170 * Returns one of the four traces - all of the y-coordinate values, 171 * each of which correspond to a single x-coordinate relative to the 172 * position in the array, so that if element 4 in the array is 972, then 173 * x is 4 and y is 972 for that point. 174 * 175 * @param base the DNA AttomicSymbol to retrieve the trace values for 176 * @return an array of ints giving the entire trace for that base 177 * @throws IllegalSymbolException if the base is not valid 178 */ 179 public int[] getTrace (AtomicSymbol base) throws IllegalSymbolException 180 { 181 if (base == DNATools.a()) { 182 return A; 183 } else if (base == DNATools.c()) { 184 return C; 185 } else if (base == DNATools.g()) { 186 return G; 187 } else if (base == DNATools.t()) { 188 return T; 189 } else { 190 DNATools.getDNA().validate(base); 191 throw new IllegalSymbolException("Don't know symbol: " + base); 192 } 193 } 194 195/** 196 * Returns a BufferedImage that represents the entire trace. The height can be set precisely in 197 * pixels, the width in pixels is determined by the scaling factor times the number 198 * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented 199 * in the returned image. 200 * 201 * @param imageHeight is the desired height of the image in pixels. 202 * @param widthScale indiates how many horizontal pixels to use to represent a single x-coordinate (try 2). 203 */ 204 public BufferedImage getImage(int imageHeight, int widthScale) 205 { 206 BufferedImage out = new BufferedImage(TraceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED); 207 Graphics2D g = out.createGraphics(); 208 Color acolor = Color.green.darker(); 209 Color ccolor = Color.blue; 210 Color gcolor = Color.black; 211 Color tcolor = Color.red; 212 Color ncolor = Color.pink; 213 double scale = calculateScale(imageHeight); 214 int[] bc = Basecalls; 215 char[] seq = sequence.toCharArray(); 216 g.setBackground(Color.white); 217 g.clearRect(0, 0, TraceLength * widthScale, imageHeight); 218 int here = 0; 219 int basenum = 0; 220 for (int q = 1; q <= 5; q++) 221 { 222 for (int x = 0; x <= TraceLength - 2; x++) 223 { 224 if (q==1) 225 { 226 g.setColor(acolor); 227 g.drawLine(2*x, transmute(A[x], imageHeight, scale), 228 2*(x + 1), transmute(A[x+1], imageHeight, scale)); 229 } 230 if (q==2) 231 { 232 g.setColor(ccolor); 233 g.drawLine(2*x, transmute(C[x], imageHeight, scale), 234 2*(x + 1), transmute(C[x+1], imageHeight, scale)); 235 } 236 if (q==3) 237 { 238 g.setColor(tcolor); 239 g.drawLine(2*x, transmute(T[x], imageHeight, scale), 240 2*(x + 1), transmute(T[x+1], imageHeight, scale)); 241 } 242 if (q==4) 243 { 244 g.setColor(gcolor); 245 g.drawLine(2*x, transmute(G[x], imageHeight, scale), 246 2*(x + 1), transmute(G[x+1], imageHeight, scale)); 247 } 248 if (q==5) 249 { 250 if ((here > bc.length-1) || (basenum > seq.length-1)) break; 251 if (bc[here] == x) 252 { 253 g.drawLine(2*x, transmute(-2, imageHeight, 1.0), 254 2*x, transmute(-7, imageHeight, 1.0)); 255 if ((basenum+1)%10 == 0) //if the basecount is divisible by ten 256 //add a number 257 { 258 g.drawLine(2*x, transmute(-20, imageHeight, 1.0), 259 2*x, transmute(-25, imageHeight, 1.0)); 260 g.drawString(Integer.toString(basenum+1), 261 2*x-3, transmute(-36, imageHeight, 1.0)); 262 } 263 switch (seq[basenum]) 264 { 265 case 'A': case 'a': g.setColor(acolor); break; 266 case 'C': case 'c': g.setColor(ccolor); break; 267 case 'G': case 'g': g.setColor(gcolor); break; 268 case 'T': case 't': g.setColor(tcolor); break; 269 default: g.setColor(ncolor); 270 } 271 g.drawChars(seq, basenum, 1, 272 2*x-3, transmute(-18, imageHeight, 1.0)); 273 g.setColor(Color.black); 274 here++; basenum++; 275 } 276 } 277 } 278 } 279 return out; 280 } 281 282/** 283 * Initialize all of the data fields for this object. 284 * @throws IllegalArgumentException which will propagate to all of the constructors. 285 */ 286 private void initData(byte[] fileData) 287 { 288 TraceData = fileData; 289 if (isABI()) 290 { 291 setIndex(); 292 setBasecalls(); 293 setSeq(); 294 setTraces(); 295 } 296 else throw new IllegalArgumentException("Not a valid ABI file."); 297 } 298 299/** 300 * A utility method which fills array b with data from the trace starting at traceDataOffset. 301 */ 302 private void getSubArray(byte[] b, int traceDataOffset) 303 { 304 for (int x=0; x<=b.length-1; x++) 305 { 306 b[x] = TraceData[traceDataOffset + x]; 307 } 308 } 309 310/** 311 * Shuffle the pointers to point to the proper spots in the trace, then load the 312 * traces into their arrays. 313 */ 314 private void setTraces() 315 { 316 int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T 317 int datas[] = new int[4]; 318 char order[] = new char[4]; 319 320 datas[0] = DATA9; 321 datas[1] = DATA10; 322 datas[2] = DATA11; 323 datas[3] = DATA12; 324 325 for (int i=0; i<=3; i++) 326 { 327 order[i]=(char) TraceData[FWO+i]; 328 } 329 330 for (int i=0; i <=3; i++) 331 { 332 switch (order[i]) 333 { 334 case 'A': case 'a': 335 pointers[0] = datas[i]; 336 break; 337 case 'C': case 'c': 338 pointers[1] = datas[i]; 339 break; 340 case 'G': case 'g': 341 pointers[2] = datas[i]; 342 break; 343 case 'T': case 't': 344 pointers[3] = datas[i]; 345 break; 346 default: 347 throw new IllegalArgumentException("Trace contains illegal values."); 348 } 349 } 350 351 A = new int[TraceLength]; 352 C = new int[TraceLength]; 353 G = new int[TraceLength]; 354 T = new int[TraceLength]; 355 356 for (int i=0; i <=3; i++) 357 { 358 byte[] qq = new byte[TraceLength*2]; 359 getSubArray(qq, pointers[i]); 360 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 361 for (int x=0; x <=TraceLength - 1; x++) 362 { 363 try 364 { 365 if (i == 0) A[x] = (int) dis.readShort(); 366 if (i == 1) C[x] = (int) dis.readShort(); 367 if (i == 2) G[x] = (int) dis.readShort(); 368 if (i == 3) T[x] = (int) dis.readShort(); 369 }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong. 370 { 371 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 372 } 373 } 374 } 375 return; 376 } 377 378/** 379 * Fetch the sequence from the trace data. 380 */ 381 private void setSeq() 382 { 383 char tempseq[] = new char[SeqLength]; 384 for (int x = 0; x <= SeqLength - 1; ++x) 385 { 386 tempseq[x] = (char) TraceData[PBAS2 + x]; 387 } 388 sequence = new String (tempseq); 389 } 390 391 392/** 393 * Fetch the basecalls from the trace data. 394 */ 395 private void setBasecalls() 396 { 397 Basecalls = new int[SeqLength]; 398 byte[] qq = new byte[SeqLength*2]; 399 getSubArray(qq, PLOC); 400 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 401 for (int i = 0; i <= SeqLength -1; ++i) 402 { 403 try 404 { 405 Basecalls[i]=(int) dis.readShort(); 406 }catch(IOException e)//This shouldn't happen. If it does something must be seriously wrong. 407 { 408 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 409 } 410 } 411 } 412 413/** 414 * Utility method to return an int beginning at <code>pointer</code> in the TraceData array. 415 */ 416 private int getIntAt(int pointer) 417 { 418 int out = 0; 419 byte[] temp = new byte[4]; 420 getSubArray(temp, pointer); 421 try 422 { 423 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp)); 424 out = dis.readInt(); 425 }catch(IOException e) //This shouldn't happen. If it does something must be seriously wrong. 426 { 427 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 428 } 429 return out; 430 } 431 432/** 433 * Utility method to translate y coordinates from graph space (where up is greater) 434 * to image space (where down is greater). 435 */ 436 private int transmute(int ya, int height, double scale) 437 { 438 return (height - 45 - (int) (ya * scale)); 439 } 440 441/** 442 * Get the maximum height of any of the traces. The data is persisted for performance 443 * in the event of multiple calls, but it initialized lazily. 444 */ 445 private int getMaximum() 446 { 447 if (maximum > 0) return maximum; 448 int max = 0; 449 for (int x=0; x<=T.length-1; x++) 450 { 451 if (T[x] > max) max = T[x]; 452 if (A[x] > max) max = A[x]; 453 if (C[x] > max) max = C[x]; 454 if (G[x] > max) max = G[x]; 455 } 456 return max; 457 } 458 459 //calculates the necessary scaling to allow the trace to fit vertically 460 //in the space specified. 461/** 462 * Returns the scaling factor necessary to allow all of the traces to fit vertically 463 * into the specified space. 464 * @param <code>height</code> - the required height in pixels. 465 */ 466 private double calculateScale(int height) 467 { 468 double newScale = 0.0; 469 double max = (double)getMaximum(); 470 double ht = (double)height; 471 newScale = ((ht - 50.0))/max; 472 return newScale; 473 } 474 475/** 476 * Sets up all of the initial pointers to the important records in TraceData. 477 */ 478 private void setIndex() 479 { 480 int DataCounter, PBASCounter, PLOCCounter, NumRecords; 481 byte[] RecNameArray = new byte[4]; 482 String RecName; 483 484 DataCounter = 0; PBASCounter = 0; PLOCCounter = 0; 485 486 IndexBase = getIntAt(AbsIndexBase + MacJunk); 487 NumRecords = getIntAt(AbsIndexBase - 8 + MacJunk); 488 489 for (int record = 0; record <= NumRecords - 1; record++) 490 { 491 getSubArray(RecNameArray, (IndexBase + (record * 28))); 492 RecName = new String (RecNameArray); 493 if (RecName.equals("FWO_")) 494 FWO = IndexBase + (record * 28) + 20; 495 if (RecName.equals("DATA")) 496 { 497 ++DataCounter; 498 if (DataCounter == 9) 499 DATA9 = IndexBase + (record * 28) + 20; 500 if (DataCounter == 10) 501 DATA10 = IndexBase + (record * 28) + 20; 502 if (DataCounter == 11) 503 DATA11 = IndexBase + (record * 28) + 20; 504 if (DataCounter == 12) 505 DATA12 = IndexBase + (record * 28) + 20; 506 } 507 if (RecName.equals("PBAS")) 508 { 509 ++PBASCounter; 510 if (PBASCounter == 2) 511 PBAS2 = IndexBase + (record * 28) + 20; 512 } 513 if (RecName.equals("PLOC")) 514 { 515 ++PLOCCounter; 516 if (PLOCCounter == 2) 517 PLOC = IndexBase + (record * 28) + 20; 518 } 519 520 } //next record 521 TraceLength = getIntAt(DATA12 - 8); 522 SeqLength = getIntAt(PBAS2-4); 523 PLOC = getIntAt(PLOC) + MacJunk; 524 DATA9 = getIntAt(DATA9) + MacJunk; 525 DATA10 = getIntAt(DATA10) + MacJunk; 526 DATA11 = getIntAt(DATA11) + MacJunk; 527 DATA12 = getIntAt(DATA12) + MacJunk; 528 PBAS2 = getIntAt(PBAS2) + MacJunk; 529 } 530 531/** 532 * Test to see if the file is ABI format by checking to see that the first three bytes 533 * are "ABI". Also handle the special case where 128 bytes were prepended to the file 534 * due to binary FTP from an older macintosh system. 535 */ 536 private boolean isABI() 537 { 538 char ABI[] = new char[4]; 539 540 for (int i=0; i<=2; i++) 541 { 542 ABI[i]=(char) TraceData[i]; 543 } 544 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) 545 { 546 return true; 547 } 548 else 549 { 550 for (int i=128; i<=130; i++) 551 { 552 ABI[i]=(char) TraceData[i]; 553 } 554 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) 555 { 556 MacJunk=128; 557 return true; 558 } 559 else 560 return false; 561 } 562 } 563} 564