001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 05-04-2018 021 */ 022 023package org.biojava.nbio.core.sequence.io; 024 025import java.io.IOException; 026import java.io.File; 027import java.io.BufferedInputStream; 028import java.io.ByteArrayOutputStream; 029import java.io.FileInputStream; 030import java.io.DataInputStream; 031import java.io.ByteArrayInputStream; 032import java.awt.Color; 033import java.awt.Graphics2D; 034import java.awt.image.BufferedImage; 035import java.net.URL; 036import java.io.InputStream; 037 038import org.biojava.nbio.core.sequence.compound.ABITracerCompoundSet; 039import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 040import org.biojava.nbio.core.sequence.template.AbstractSequence; 041import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 042 043/** 044 * Title: ABITrace<p><p> 045 * ABITrace is a class for managing ABI file information, 046 * it is capable of opening an ABI file and storing 047 * the most important fields, which can be recalled as simple java types. It can also return 048 * an image corresponding to the trace. 049 * It has three constructors with input types <code>File, URL, and byte[]</code>.<p><p> 050 * ABI files contain two sets of basecall and sequence data, one that was originally 051 * created programatically and the other, which is an editable copy. This version of this object 052 * only references the original unedited data.<p> 053 */ 054public class ABITrace { 055 056 //the next three lines are the important persistent data 057 private String sequence; 058 private int A[], G[], C[], T[], baseCalls[], qCalls[]; 059 private int traceLength, seqLength; 060 061 //This is the actual file data. 062 private byte[] traceData; 063 064 //the next four declaration lines comprise the file index information 065 private int macJunk = 0; //sometimes when macintosh files are 066 //FTPed in binary form, they have 128 bytes 067 //of crap pre-pended to them. This constant 068 //allows ABITrace to handle that in a way that 069 //is invisible to the user. 070 private static final int absIndexBase = 26; //The file location of the Index pointer 071 private int PLOC, PCON; 072 073 //the next declaration is for the actual file pointers 074 private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO; 075 076 /** 077 * The File constructor opens a local ABI file and parses the content. 078 * 079 * @param ABIFile is a <code>java.io.File</code> on the local file system. 080 * @throws IOException if there is a problem reading the file. 081 * @throws IllegalArgumentException if the file is not a valid ABI file. 082 */ 083 public ABITrace(File ABIFile) throws IOException 084 { 085 FileInputStream fis = new FileInputStream(ABIFile); 086 BufferedInputStream bis = new BufferedInputStream(fis); 087 ABITraceInit(bis); 088 fis.close(); 089 } 090 091 /** 092 * The URL constructor opens an ABI file from any URL. 093 * 094 * @param ABIFile is a <code>java.net.URL</code> for an ABI trace file. 095 * @throws IOException if there is a problem reading from the URL. 096 * @throws IllegalArgumentException if the URL does not contain a valid ABI file. 097 */ 098 public ABITrace( URL ABIFile ) throws IOException 099 { 100 InputStream is = ABIFile.openStream(); 101 BufferedInputStream bis = new BufferedInputStream(is); 102 ABITraceInit(bis); 103 is.close(); 104 } 105 106 /** 107 * Helper method for constructors 108 * 109 * @param bis - BufferedInputStream 110 * @throws IOException if there is a problem reading from the BufferedInputStream 111 */ 112 private void ABITraceInit(BufferedInputStream bis) throws IOException{ 113 byte[] bytes = null; 114 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 115 int b; 116 while ((b = bis.read()) >= 0) 117 { 118 baos.write(b); 119 } 120 bis.close(); baos.close(); 121 bytes = baos.toByteArray(); 122 initData(bytes); 123 } 124 125 /** 126 * The <code>byte[]</code> constructor parses an ABI file represented as a byte array. 127 * 128 * @param ABIFileData - byte array 129 * @throws IllegalArgumentException if the data does not represent a valid ABI file. 130 */ 131 public ABITrace(byte[] ABIFileData) { 132 initData(ABIFileData); 133 } 134 135 /** 136 * Returns the length of the sequence (number of bases) in this trace. 137 * 138 * @return int seqLength 139 */ 140 public int getSequenceLength() { 141 return seqLength; 142 } 143 144 /** 145 * Returns the length of the trace (number of x-coordinate points in the graph). 146 * 147 * @return int traceLength 148 */ 149 public int getTraceLength() { 150 return traceLength; 151 } 152 153 /** 154 * Returns an <code>int[]</code> array that represents the basecalls - each int in the 155 * array corresponds to an x-coordinate point in the graph that is a peak (a base location). 156 * 157 * @return int[] Basecalls 158 */ 159 public int[] getBasecalls() { 160 return baseCalls; 161 } 162 163 /** 164 * Returns an <code>int[]</code> array that represents the quality - each int in the 165 * array corresponds to an quality value 90-255) in the graph at a base location). 166 * 167 * @return int[] qCalls 168 */ 169 public int[] getQcalls() { 170 return qCalls; 171 } 172 173 /** 174 * Returns the original programmatically determined (unedited) sequence as a {@link AbstractSequence<NucleotideCompound>}. 175 * 176 * @return sequence 177 */ 178 public AbstractSequence<NucleotideCompound> getSequence() throws CompoundNotFoundException { 179 DNASequenceCreator creator = new DNASequenceCreator(ABITracerCompoundSet.getABITracerCompoundSet()); 180 return creator.getSequence(sequence, 0); 181 } 182 183 /** 184 * Returns one of the four traces - all of the y-coordinate values, 185 * each of which correspond to a single x-coordinate relative to the 186 * position in the array, so that if element 4 in the array is 972, then 187 * x is 4 and y is 972 for that point. 188 * 189 * @param base - the DNA String to retrieve the trace values for 190 * @return an array of ints giving the entire trace for that base 191 * @throws CompoundNotFoundException if the base is not valid 192 */ 193 public int[] getTrace (String base) throws CompoundNotFoundException { 194 if ("A".equals(base)) { 195 return A; 196 } else if ("C".equals(base)) { 197 return C; 198 } else if ("G".equals(base)) { 199 return G; 200 } else if ("T".equals(base)) { 201 return T; 202 } else { 203 throw new CompoundNotFoundException("Don't know base: " + base); 204 } 205 } 206 207 /** 208 * Returns a BufferedImage that represents the entire trace. The height can be set precisely in 209 * pixels, the width in pixels is determined by the scaling factor times the number 210 * of points in the trace (<code>getTraceLength()</code>). The entire trace is represented 211 * in the returned image. 212 * 213 * @param imageHeight - desired height of the image in pixels. 214 * @param widthScale - how many horizontal pixels to use to represent a single x-coordinate (try 2). 215 * @return BufferedImage image 216 */ 217 public BufferedImage getImage(int imageHeight, int widthScale) { 218 BufferedImage out = new BufferedImage(traceLength * widthScale, imageHeight, BufferedImage.TYPE_BYTE_INDEXED); 219 Graphics2D g = out.createGraphics(); 220 Color acolor = Color.green.darker(); 221 Color ccolor = Color.blue; 222 Color gcolor = Color.black; 223 Color tcolor = Color.red; 224 Color ncolor = Color.pink; 225 double scale = calculateScale(imageHeight); 226 int[] bc = baseCalls; 227 char[] seq = sequence.toCharArray(); 228 g.setBackground(Color.white); 229 g.clearRect(0, 0, traceLength * widthScale, imageHeight); 230 int here = 0; 231 int basenum = 0; 232 for (int q = 1; q <= 5; q++) { 233 for (int x = 0; x <= traceLength - 2; x++) { 234 if (q == 1) { 235 g.setColor(acolor); 236 g.drawLine(widthScale * x, transmute(A[x], imageHeight, scale), 237 widthScale * (x + 1), transmute(A[x + 1], imageHeight, scale)); 238 } 239 if (q == 2) { 240 g.setColor(ccolor); 241 g.drawLine(widthScale * x, transmute(C[x], imageHeight, scale), 242 widthScale * (x + 1), transmute(C[x + 1], imageHeight, scale)); 243 } 244 if (q == 3) { 245 g.setColor(tcolor); 246 g.drawLine(widthScale * x, transmute(T[x], imageHeight, scale), 247 widthScale * (x + 1), transmute(T[x + 1], imageHeight, scale)); 248 } 249 if (q == 4) { 250 g.setColor(gcolor); 251 g.drawLine(widthScale * x, transmute(G[x], imageHeight, scale), 252 widthScale * (x + 1), transmute(G[x + 1], imageHeight, scale)); 253 } 254 if (q == 5) { 255 if ((here > bc.length - 1) || (basenum > seq.length - 1)) break; 256 if (bc[here] == x) { 257 g.drawLine(widthScale * x, transmute(-2, imageHeight, 1.0), 258 widthScale * x, transmute(-7, imageHeight, 1.0)); 259 if ((basenum + 1) % 10 == 0) //if the basecount is divisible by ten 260 //add a number 261 { 262 g.drawLine(widthScale * x, transmute(-20, imageHeight, 1.0), 263 widthScale * x, transmute(-25, imageHeight, 1.0)); 264 g.drawString(Integer.toString(basenum + 1), 265 widthScale * x - 3, transmute(-36, imageHeight, 1.0)); 266 } 267 switch (seq[basenum]) { 268 case 'A': 269 case 'a': 270 g.setColor(acolor); 271 break; 272 case 'C': 273 case 'c': 274 g.setColor(ccolor); 275 break; 276 case 'G': 277 case 'g': 278 g.setColor(gcolor); 279 break; 280 case 'T': 281 case 't': 282 g.setColor(tcolor); 283 break; 284 default: 285 g.setColor(ncolor); 286 } 287 g.drawChars(seq, basenum, 1, 288 widthScale * x - 3, transmute(-18, imageHeight, 1.0)); 289 g.setColor(Color.black); 290 here++; 291 basenum++; 292 } 293 } 294 } 295 } 296 return out; 297 } 298 299 /** 300 * Utility method to translate y coordinates from graph space (where up is greater) 301 * to image space (where down is greater). 302 * 303 * @param ya 304 * @param height 305 * @param scale 306 * @return - translated y coordinates from graph space (where up is greater) to image space 307 */ 308 private int transmute(int ya, int height, double scale) { 309 return (height - 45 - (int) (ya * scale)); 310 } 311 312 //calculates the necessary scaling to allow the trace to fit vertically 313 //in the space specified. 314 315 /** 316 * Returns the scaling factor necessary to allow all of the traces to fit vertically 317 * into the specified space. 318 * 319 * @param height - required height in pixels 320 * @return - scaling factor 321 */ 322 private double calculateScale(int height) { 323 double newScale = 0.0; 324 double max = (double) getMaximum(); 325 double ht = (double) height; 326 newScale = ((ht - 50.0)) / max; 327 return newScale; 328 } 329 330 /** 331 * Get the maximum height of any of the traces. The data is persisted for performance 332 * in the event of multiple calls, but it initialized lazily. 333 * 334 * @return - maximum height of any of the traces 335 */ 336 private int getMaximum() { 337 int max = 0; 338 for (int x = 0; x <= T.length - 1; x++) { 339 if (T[x] > max) max = T[x]; 340 if (A[x] > max) max = A[x]; 341 if (C[x] > max) max = C[x]; 342 if (G[x] > max) max = G[x]; 343 } 344 return max; 345 } 346 347 /** 348 * Initialize all of the data fields for this object. 349 * 350 * @param fileData - data for object 351 * @throws IllegalArgumentException which will propagate to all of the constructors. 352 */ 353 private void initData(byte[] fileData) { 354 traceData = fileData; 355 if (isABI()) { 356 setIndex(); 357 setBasecalls(); 358 setQcalls(); 359 setSeq(); 360 setTraces(); 361 } else throw new IllegalArgumentException("Not a valid ABI file."); 362 } 363 364 /** 365 * Shuffle the pointers to point to the proper spots in the trace, then load the 366 * traces into their arrays. 367 */ 368 private void setTraces() { 369 int pointers[] = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T 370 int datas[] = new int[4]; 371 char order[] = new char[4]; 372 373 datas[0] = DATA9; 374 datas[1] = DATA10; 375 datas[2] = DATA11; 376 datas[3] = DATA12; 377 378 for (int i = 0; i <= 3; i++) { 379 order[i] = (char) traceData[FWO + i]; 380 } 381 382 for (int i = 0; i <= 3; i++) { 383 switch (order[i]) { 384 case 'A': 385 case 'a': 386 pointers[0] = datas[i]; 387 break; 388 case 'C': 389 case 'c': 390 pointers[1] = datas[i]; 391 break; 392 case 'G': 393 case 'g': 394 pointers[2] = datas[i]; 395 break; 396 case 'T': 397 case 't': 398 pointers[3] = datas[i]; 399 break; 400 default: 401 throw new IllegalArgumentException("Trace contains illegal values."); 402 } 403 } 404 405 A = new int[traceLength]; 406 C = new int[traceLength]; 407 G = new int[traceLength]; 408 T = new int[traceLength]; 409 410 for (int i = 0; i <= 3; i++) { 411 byte[] qq = new byte[traceLength * 2]; 412 getSubArray(qq, pointers[i]); 413 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 414 for (int x = 0; x <= traceLength - 1; x++) { 415 try { 416 if (i == 0) A[x] = (int) dis.readShort(); 417 if (i == 1) C[x] = (int) dis.readShort(); 418 if (i == 2) G[x] = (int) dis.readShort(); 419 if (i == 3) T[x] = (int) dis.readShort(); 420 } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong. 421 { 422 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 423 } 424 } 425 } 426 return; 427 } 428 429 /** 430 * Fetch the sequence from the trace data. 431 */ 432 private void setSeq() { 433 char tempseq[] = new char[seqLength]; 434 for (int x = 0; x <= seqLength - 1; ++x) { 435 tempseq[x] = (char) traceData[PBAS2 + x]; 436 } 437 sequence = String.valueOf(tempseq); 438 } 439 440 /** 441 * Fetch the quality calls from the trace data. 442 */ 443 private void setQcalls() { 444 qCalls = new int[seqLength]; 445 byte[] qq = new byte[seqLength]; 446 getSubArray(qq, PCON); 447 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 448 for (int i = 0; i <= seqLength - 1; ++i) { 449 try { 450 qCalls[i] = (int) dis.readByte(); 451 } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong. 452 { 453 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 454 } 455 } 456 } 457 458 /** 459 * Fetch the basecalls from the trace data. 460 */ 461 private void setBasecalls() { 462 baseCalls = new int[seqLength]; 463 byte[] qq = new byte[seqLength * 2]; 464 getSubArray(qq, PLOC); 465 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(qq)); 466 for (int i = 0; i <= seqLength - 1; ++i) { 467 try { 468 baseCalls[i] = (int) dis.readShort(); 469 } catch (IOException e)//This shouldn't happen. If it does something must be seriously wrong. 470 { 471 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 472 } 473 } 474 } 475 476 /** 477 * Sets up all of the initial pointers to the important records in TraceData. 478 */ 479 private void setIndex() { 480 int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords, indexBase; 481 byte[] RecNameArray = new byte[4]; 482 String RecName; 483 484 DataCounter = 0; 485 PBASCounter = 0; 486 PLOCCounter = 0; 487 PCONCounter = 0; 488 489 indexBase = getIntAt(absIndexBase + macJunk); 490 NumRecords = getIntAt(absIndexBase - 8 + macJunk); 491 492 for (int record = 0; record <= NumRecords - 1; record++) { 493 getSubArray(RecNameArray, (indexBase + (record * 28))); 494 RecName = new String(RecNameArray); 495 if ("FWO_".equals(RecName)) 496 FWO = indexBase + (record * 28) + 20; 497 if ("DATA".equals(RecName)) { 498 ++DataCounter; 499 if (DataCounter == 9) 500 DATA9 = indexBase + (record * 28) + 20; 501 if (DataCounter == 10) 502 DATA10 = indexBase + (record * 28) + 20; 503 if (DataCounter == 11) 504 DATA11 = indexBase + (record * 28) + 20; 505 if (DataCounter == 12) 506 DATA12 = indexBase + (record * 28) + 20; 507 } 508 if ("PBAS".equals(RecName)) { 509 ++PBASCounter; 510 if (PBASCounter == 2) 511 PBAS2 = indexBase + (record * 28) + 20; 512 } 513 if ("PLOC".equals(RecName)) { 514 ++PLOCCounter; 515 if (PLOCCounter == 2) 516 PLOC = indexBase + (record * 28) + 20; 517 } 518 if ("PCON".equals(RecName)) { 519 ++PCONCounter; 520 if (PCONCounter == 2) 521 PCON = indexBase + (record * 28) + 20; 522 } 523 524 } //next record 525 traceLength = getIntAt(DATA12 - 8); 526 seqLength = getIntAt(PBAS2 - 4); 527 PLOC = getIntAt(PLOC) + macJunk; 528 DATA9 = getIntAt(DATA9) + macJunk; 529 DATA10 = getIntAt(DATA10) + macJunk; 530 DATA11 = getIntAt(DATA11) + macJunk; 531 DATA12 = getIntAt(DATA12) + macJunk; 532 PBAS2 = getIntAt(PBAS2) + macJunk; 533 PCON = getIntAt(PCON) + macJunk; 534 } 535 536 /** 537 * Utility method to return an int beginning at <code>pointer</code> in the TraceData array. 538 * 539 * @param pointer - beginning of trace array 540 * @return - int beginning at pointer in trace array 541 */ 542 private int getIntAt(int pointer) { 543 int out = 0; 544 byte[] temp = new byte[4]; 545 getSubArray(temp, pointer); 546 try { 547 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(temp)); 548 out = dis.readInt(); 549 } catch (IOException e) //This shouldn't happen. If it does something must be seriously wrong. 550 { 551 throw new IllegalStateException("Unexpected IOException encountered while manipulating internal streams."); 552 } 553 return out; 554 } 555 556 /** 557 * A utility method which fills array b with data from the trace starting at traceDataOffset. 558 * 559 * @param b - trace byte array 560 * @param traceDataOffset - starting point 561 */ 562 private void getSubArray(byte[] b, int traceDataOffset) { 563 for (int x = 0; x <= b.length - 1; x++) { 564 b[x] = traceData[traceDataOffset + x]; 565 } 566 } 567 568 /** 569 * Test to see if the file is ABI format by checking to see that the first three bytes 570 * are "ABI". Also handle the special case where 128 bytes were prepended to the file 571 * due to binary FTP from an older macintosh system. 572 * 573 * @return - if format of ABI file is correct 574 */ 575 private boolean isABI() { 576 char ABI[] = new char[4]; 577 578 for (int i = 0; i <= 2; i++) { 579 ABI[i] = (char) traceData[i]; 580 } 581 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) { 582 return true; 583 } else { 584 for (int i = 128; i <= 130; i++) { 585 ABI[i-128] = (char) traceData[i]; 586 } 587 if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) { 588 macJunk = 128; 589 return true; 590 } else 591 return false; 592 } 593 } 594}