001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024import java.io.PrintStream; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Iterator; 029import java.util.List; 030import java.util.StringTokenizer; 031 032import org.biojava.bio.BioError; 033import org.biojava.bio.BioException; 034import org.biojava.bio.seq.DNATools; 035import org.biojava.bio.seq.Feature; 036import org.biojava.bio.seq.StrandedFeature; 037import org.biojava.bio.symbol.Alphabet; 038import org.biojava.bio.symbol.IllegalAlphabetException; 039import org.biojava.bio.symbol.IllegalSymbolException; 040import org.biojava.bio.symbol.Symbol; 041 042/** 043 * <code>GenbankFileFormer</code> performs the detailed formatting of 044 * Genbank entries for writing to a <code>PrintStream</code>. There is 045 * some code dupication with <code>EmblFileFormer</code> which could 046 * be factored out. 047 * 048 * @author Keith James 049 * @since 1.2 050 * @deprecated Use org.biojavax.bio.seq.io framework instead 051 */ 052public class GenbankFileFormer extends AbstractGenEmblFileFormer 053 implements SeqFileFormer 054{ 055 private PrintStream stream; 056 057 // Main sequence formatting buffer 058 private StringBuffer sq = new StringBuffer(); 059 // Main qualifier formatting buffer 060 private StringBuffer qb = new StringBuffer(); 061 // Utility formatting buffer 062 private StringBuffer ub = new StringBuffer(); 063 064 // Buffers for each possible sequence property line 065 private StringBuffer idb = null; 066 private StringBuffer acb = null; 067 private StringBuffer deb = null; 068 private StringBuffer svb = null; 069 private StringBuffer kwb = null; 070 private StringBuffer osb = null; 071 private StringBuffer ocb = null; 072 private StringBuffer ccb = null; 073 private Object rfb = null; 074 private StringBuffer ftb = new StringBuffer(); 075 076 // Locusline buffers 077 private StringBuffer typeb = new StringBuffer(); 078 private StringBuffer strb = new StringBuffer(); 079 private StringBuffer sizeb = new StringBuffer(); 080 private StringBuffer circb = new StringBuffer(); 081 private StringBuffer mdatb = new StringBuffer(); 082 private StringBuffer divb = new StringBuffer(); 083 084 private SymbolTokenization dnaTokenization; 085 086 //vector NTI requires a slightly different flavour of Genbank 087 private boolean vecNTISupport = false; 088 089 { 090 try 091 { 092 dnaTokenization = DNATools.getDNA().getTokenization("token"); 093 } 094 catch (BioException ex) 095 { 096 throw new BioError("Couldn't initialize tokenizer for the DNA alphabet",ex); 097 } 098 } 099 100 /** 101 * Creates a new <code>GenbankFileFormer</code> using 102 * <code>System.out</code> stream. 103 */ 104 protected GenbankFileFormer() 105 { 106 this(System.out); 107 } 108 109 /** 110 * Creates a new <code>GenbankFileFormer</code> using the 111 * specified stream. 112 * 113 * @param stream a <code>PrintStream</code>. 114 */ 115 protected GenbankFileFormer(PrintStream stream) 116 { 117 this.stream = stream; 118 } 119 120 public PrintStream getPrintStream() 121 { 122 return stream; 123 } 124 125 public void setPrintStream(PrintStream stream) 126 { 127 this.stream = stream; 128 } 129 130 public void setName(String id) throws ParseException 131 { 132 idb = new StringBuffer("LOCUS " + id); 133 } 134 135 public void startSequence() throws ParseException { } 136 137 public void endSequence() throws ParseException { } 138 139 public void setURI(String uri) throws ParseException { } 140 141 public void addSymbols(Alphabet alpha, 142 Symbol [] syms, 143 int start, 144 int length) 145 throws IllegalAlphabetException 146 { 147 try 148 { 149 int aCount = 0; 150 int cCount = 0; 151 int gCount = 0; 152 int tCount = 0; 153 int oCount = 0; 154 155 int end = start + length - 1; 156 157 for (int i = start; i <= end; i++) 158 { 159 char c = dnaTokenization.tokenizeSymbol(syms[i]).charAt(0); 160 161 switch (c) 162 { 163 case 'a': case 'A': 164 aCount++; 165 break; 166 case 'c': case 'C': 167 cCount++; 168 break; 169 case 'g': case 'G': 170 gCount++; 171 break; 172 case 't': case 'T': 173 tCount++; 174 break; 175 176 default: 177 oCount++; 178 } 179 } 180 181 // FIXME: (kj) shouldn't be printing sequence properties 182 // in addSymbols method. If you filter out symbols you 183 // lose all sequence properties too. 184 185 // Print out sequence properties in order 186 locusLineCreator(length); 187 if (idb != null) {stream.println(idb); } 188 if (acb != null) {stream.println(acb); } 189 if (svb != null) {stream.println(svb); } 190 if (deb != null) {stream.println(deb); } 191 if (kwb != null) {stream.println(kwb); } 192 if (osb != null) {stream.println(osb); } 193 if (ocb != null) {stream.println(ocb); } 194 if (ccb != null) {stream.println(ccb); } 195 if (rfb != null) {//RichardH 196 if (rfb instanceof List) { 197 Iterator i = ((List)rfb).iterator(); 198 while (i.hasNext()) { stream.println((StringBuffer)i.next()); } 199 } else { 200 stream.println(rfb); 201 } 202 } 203 204 if (ftb.length() != 0) 205 { 206 ftb.insert(0, "FEATURES Location/Qualifiers" + nl); 207 stream.print(ftb); 208 } 209 210 sq.setLength(0); 211 sq.append("BASE COUNT "); 212 sq.append(aCount + " a "); 213 sq.append(cCount + " c "); 214 sq.append(gCount + " g "); 215 sq.append(tCount + " t "); 216 sq.append(oCount + " others"); 217 sq.append(nl); 218 sq.append("ORIGIN"); 219 220 // Print sequence summary header 221 stream.println(sq); 222 223 int fullLine = length / 60; 224 int partLine = length % 60; 225 226 int lineCount = fullLine; 227 if (partLine > 0) 228 lineCount++; 229 230 int lineLens [] = new int [lineCount]; 231 232 // All lines are 60, except last (if present) 233 Arrays.fill(lineLens, 60); 234 235 if (partLine > 0) 236 lineLens[lineCount - 1] = partLine; 237 238 // Prepare line 80 characters wide, sequence is subset of this 239 char [] emptyLine = new char [80]; 240 241 for (int i = 0; i < lineLens.length; i++) 242 { 243 sq.setLength(0); 244 ub.setLength(0); 245 246 // How long is this chunk? 247 int len = lineLens[i]; 248 249 // Prep the whitespace 250 Arrays.fill(emptyLine, ' '); 251 sq.append(emptyLine); 252 253 // Prepare a Symbol array same length as chunk 254 Symbol [] sa = new Symbol [len]; 255 256 // Get symbols and format into blocks of tokens 257 System.arraycopy(syms, start + (i * 60), sa, 0, len); 258 259 String blocks = (formatTokenBlock(ub, sa, 10, dnaTokenization)).toString(); 260 261 sq.replace(10, blocks.length() + 10, blocks); 262 263 // Calculate the running residue count and add to the line 264 String count = Integer.toString((i * 60) + 1); 265 sq.replace((9 - count.length()), 9, count); 266 267 // Print formatted sequence line 268 stream.println(sq); 269 } 270 271 // Print end of entry 272 stream.println("//"); 273 } 274 catch (IllegalSymbolException ex) 275 { 276 throw new IllegalAlphabetException(ex, "DNA not tokenizing"); 277 } 278 } 279 280 public void addSequenceProperty(Object key, Object value) 281 throws ParseException 282 { 283 if (key.equals("LOCUS")) { 284 idb.setLength(0); 285 idb.append("LOCUS " + (String) value); 286 } 287 else if (key.equals("TYPE")) { 288 typeb.append(value); 289 } 290 else if (key.equals("DIVISION")) { 291 divb.append(value); 292 } 293 else if (key.equals("CIRCULAR")) { 294 circb.append(value); 295 } 296 else if (key.equals("DT") || key.equals("MDAT")) { 297 if (value instanceof ArrayList) { 298 mdatb.append(((ArrayList) value).get(0)); 299 } 300 else { 301 mdatb.append(value); 302 } 303 } 304 else if (key.equals("DE") || key.equals("DEFINITION")) { 305 deb = new StringBuffer(sequenceBufferCreator("DEFINITION ", value)); 306 } 307 else if (key.equals("SV") || key.equals("VERSION")) { 308 if (svb != null) { 309 svb.insert(11, (String) value); 310 } 311 else { 312 svb = new StringBuffer("VERSION " + (String) value); 313 } 314 } 315 else if (key.equals("GI")) { 316 if (svb != null) { 317 svb.append(" GI:" + (String) value); 318 } 319 else { 320 svb = new StringBuffer("VERSION GI:" + (String) value); 321 } 322 } 323 else if (key.equals("KW") || key.equals("KEYWORDS")) { 324 kwb = new StringBuffer(sequenceBufferCreator("KEYWORDS ", value)); 325 } 326 else if (key.equals("OS") || key.equals("SOURCE")) { 327 osb = new StringBuffer(sequenceBufferCreator("SOURCE ", value)); 328 } 329 else if (key.equals("OC") || key.equals("ORGANISM")) { 330 ocb = new StringBuffer(sequenceBufferCreator(" ORGANISM ", value)); 331 } 332 else if (key.equals("CC") || key.equals("COMMENT")) { 333 ccb = new StringBuffer(sequenceBufferCreator("COMMENT ", value)); 334 } 335 else if (key.equals(GenbankProcessor.PROPERTY_GENBANK_ACCESSIONS)) 336 { 337 ub.setLength(0); 338 ub.append("ACCESSION "); 339 if(value instanceof List) { 340 for (Iterator ai = ((List) value).iterator(); ai.hasNext();) 341 { 342 ub.append((String) ai.next()); 343 } 344 } else { 345 ub.append(value); 346 } 347 acb = new StringBuffer(ub.substring(0)); 348 } 349 // GenBank-style References by RichardH 350 // FIXME: (rh) Understand EMBL-style references and ReferenceAnnotation objects here too. 351 else if (key.equals("REFERENCE")) { 352 if (value instanceof List) { 353 List rfbs = new ArrayList(); 354 List refs = (List)value; 355 Iterator i = refs.iterator(); 356 while (i.hasNext()) { 357 String v = (String)i.next(); 358 StringBuffer rfb1 = new StringBuffer(sequenceBufferCreator("REFERENCE ",v)); 359 rfbs.add(rfb1); 360 } 361 rfb = rfbs; 362 } else { 363 rfb = new StringBuffer(sequenceBufferCreator("REFERENCE ",value)); 364 } 365 } 366 else if (key.equals("AUTHORS")) { 367 if (value instanceof List) { 368 List rfbs = (List)rfb; 369 List refs = (List)value; 370 Iterator i = refs.iterator(); 371 Iterator j = rfbs.iterator(); 372 while (i.hasNext()) { 373 String v = (String)i.next(); 374 StringBuffer rfb1 = (StringBuffer)j.next(); 375 rfb1.append("\n"+sequenceBufferCreator(" AUTHORS ",v)); 376 } 377 } else { 378 if (rfb instanceof List) { 379 ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" AUTHORS ",value)); 380 } else { 381 ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" AUTHORS ",value)); 382 } 383 } 384 } 385 else if (key.equals("TITLE")) { 386 if (value instanceof List) { 387 List rfbs = (List)rfb; 388 List refs = (List)value; 389 Iterator i = refs.iterator(); 390 Iterator j = rfbs.iterator(); 391 while (i.hasNext()) { 392 String v = (String)i.next(); 393 StringBuffer rfb1 = (StringBuffer)j.next(); 394 rfb1.append("\n"+sequenceBufferCreator(" TITLE ",v)); 395 } 396 } else { 397 if (rfb instanceof List) { 398 ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" TITLE ",value)); 399 } else { 400 ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" TITLE ",value)); 401 } 402 } 403 } 404 else if (key.equals("JOURNAL")) { 405 if (value instanceof List) { 406 List rfbs = (List)rfb; 407 List refs = (List)value; 408 Iterator i = refs.iterator(); 409 Iterator j = rfbs.iterator(); 410 while (i.hasNext()) { 411 String v = (String)i.next(); 412 StringBuffer rfb1 = (StringBuffer)j.next(); 413 rfb1.append("\n"+sequenceBufferCreator(" JOURNAL ",v)); 414 } 415 } else { 416 if (rfb instanceof List) { 417 ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" JOURNAL ",value)); 418 } else { 419 ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" JOURNAL ",value)); 420 } 421 } 422 } 423 else if (key.equals("PUBMED")) { 424 if (value instanceof List) { 425 List rfbs = (List)rfb; 426 List refs = (List)value; 427 Iterator i = refs.iterator(); 428 Iterator j = rfbs.iterator(); 429 while (i.hasNext()) { 430 String v = (String)i.next(); 431 StringBuffer rfb1 = (StringBuffer)j.next(); 432 rfb1.append("\n"+sequenceBufferCreator(" PUBMED ",v)); 433 } 434 } else { 435 if (rfb instanceof List) { 436 ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" PUBMED ",value)); 437 } else { 438 ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" PUBMED ",value)); 439 } 440 } 441 } 442 else if (key.equals("MEDLINE")) { 443 if (value instanceof List) { 444 List rfbs = (List)rfb; 445 List refs = (List)value; 446 Iterator i = refs.iterator(); 447 Iterator j = rfbs.iterator(); 448 while (i.hasNext()) { 449 String v = (String)i.next(); 450 StringBuffer rfb1 = (StringBuffer)j.next(); 451 rfb1.append("\n"+sequenceBufferCreator(" MEDLINE ",v)); 452 } 453 } else { 454 if (rfb instanceof List) { 455 ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" MEDLINE ",value)); 456 } else { 457 ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" MEDLINE ",value)); 458 } 459 } 460 } 461 } 462 463 public void startFeature(Feature.Template templ) 464 throws ParseException 465 { 466 // There are 21 spaces in the leader 467 String leader = " "; 468 int strand = 0; 469 470 if (templ instanceof StrandedFeature.Template) 471 strand = ((StrandedFeature.Template) templ).strand.getValue(); 472 473 ub.setLength(0); 474 ub.append(leader); 475 476 StringBuffer lb = formatLocationBlock(ub, 477 templ.location, 478 strand, 479 leader, 480 80); 481 482 lb.replace(5, 5 + templ.type.length(), templ.type); 483 484 ftb.append(lb + nl); 485 } 486 487 public void endFeature() throws ParseException { } 488 489 public void addFeatureProperty(Object key, Object value) 490 throws ParseException 491 { 492 // There are 21 spaces in the leader 493 String leader = " "; 494 495 // Don't print internal data structures 496 if (key.equals(Feature.PROPERTY_DATA_KEY)) 497 return; 498 499 // The value may be a collection if several qualifiers of the 500 // same type are present in a feature 501 if (Collection.class.isInstance(value)) 502 { 503 for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();) 504 { 505 qb.setLength(0); 506 ub.setLength(0); 507 StringBuffer fb = formatQualifierBlock(qb, 508 formatQualifier(ub, key, vi.next()).substring(0), 509 leader, 510 80); 511 ftb.append(fb + nl); 512 } 513 } 514 else 515 { 516 qb.setLength(0); 517 ub.setLength(0); 518 StringBuffer fb = formatQualifierBlock(qb, 519 formatQualifier(ub, key, value).substring(0), 520 leader, 521 80); 522 ftb.append(fb + nl); 523 } 524 } 525 526 /** 527 * VectorNTI requires GenBank format to be a little more specific than 528 * required by the GenBank definition. By setting this to true the produced 529 * output should be parsable by VectorNTI. By default this is false. 530 * 531 * @param b to support or not to support. 532 */ 533 public void setVectorNTISupport(boolean b){ 534 vecNTISupport = b; 535 } 536 537 /** 538 * Is VectorNTI compatable output being produced? 539 * @return false by default. 540 */ 541 public boolean getVectorNTISupport(){ 542 return vecNTISupport; 543 } 544 545 private String sequenceBufferCreator(Object key, Object value) { 546 StringBuffer temp = new StringBuffer(); 547 548 if (value == null) { 549 temp.append(key.toString()); 550 } 551 else if (value instanceof ArrayList) { 552 Iterator iter = ((ArrayList) value).iterator(); 553 temp.append(key.toString() + " " + iter.next()); 554 while (iter.hasNext()) { 555 if (vecNTISupport) { 556 temp.append(nl + key.toString() +" " + iter.next()); 557 } 558 else { 559 temp.append(nl + " " + iter.next()); 560 } 561 } 562 } 563 else { 564 // FIXME: (kj) unsafe cast to String 565 StringTokenizer valueToke = new StringTokenizer((String) value, " "); 566 int fullline = 80; 567 int length = 0; 568 // FIXME: (kj) unsafe cast to String 569 temp.append((String) key); 570 if (valueToke.hasMoreTokens()) { 571 String token = valueToke.nextToken(); 572 573 while (true) { 574 length = (temp.length() % (fullline + 1)) + token.length() + 1; 575 if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); 576 while (length <= fullline && valueToke.hasMoreTokens()) { 577 temp.append(" " + token); 578 token = valueToke.nextToken(); 579 length = (temp.length() % (fullline + 1)) + token.length() + 1; 580 if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); 581 } 582 if (valueToke.hasMoreTokens()) { 583 for(int i = length-token.length(); i < fullline; i++) { 584 temp.append(" "); 585 } 586 temp.append(nl + " "); 587 } 588 else if (length <= fullline) { 589 temp.append(" " + token); 590 break; 591 } 592 else { 593 temp.append(nl); 594 temp.append(" " + token); 595 break; 596 } 597 } 598 } 599 else { 600 temp.append(" "); 601 } 602 } 603 604 return temp.substring(0); 605 } 606 607 private StringBuffer fixLength(StringBuffer temp, int length) { 608 // FIXME: (kj) check performance 609 while (temp.length() < length) { 610 temp.append(" "); 611 } 612 return temp; 613 } 614 615 private void locusLineCreator(int size) { 616 idb = fixLength(idb, 30); 617 typeb = fixLength(typeb, 8); 618 619 sizeb.insert(0, size); 620 while(sizeb.length() < 12) {sizeb.insert(0, " ");} 621 sizeb.append(" bp "); 622 623 if (strb.length() > 0) { 624 strb.append("-"); 625 } 626 strb = fixLength(strb, 3); 627 circb = fixLength(circb, 9); 628 mdatb = fixLength(mdatb, 11); 629 divb = fixLength(divb, 4); 630 idb.insert(29, sizeb); 631 idb.insert(44, strb); 632 idb.insert(47, typeb); 633 idb.insert(55, circb); 634 idb.insert(64, divb); 635 idb.insert(68, mdatb); 636 idb.setLength(79); 637 } 638}