001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024import java.io.PrintStream; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.Iterator; 028import java.util.List; 029import java.util.StringTokenizer; 030 031import org.biojava.bio.seq.Feature; 032import org.biojava.bio.seq.StrandedFeature; 033import org.biojava.bio.symbol.Alphabet; 034import org.biojava.bio.symbol.FuzzyLocation; 035import org.biojava.bio.symbol.IllegalAlphabetException; 036import org.biojava.bio.symbol.IllegalSymbolException; 037import org.biojava.bio.symbol.Location; 038import org.biojava.bio.symbol.PointLocation; 039import org.biojava.bio.symbol.RangeLocation; 040import org.biojava.bio.symbol.Symbol; 041 042/** 043 * Formats a sequence into Swissprot/TrEMBL format. Modeled after 044 * EmblFileFormer. 045 * 046 * @author Greg Cox 047 * @since 1.2 048 * @deprecated Use org.biojavax.bio.seq.io framework instead 049 */ 050public class SwissprotFileFormer extends AbstractGenEmblFileFormer 051 implements SeqFileFormer 052{ 053 // Main qualifier formatting buffer 054 private StringBuffer qb = new StringBuffer(); 055 // Utility formatting buffer 056 private StringBuffer ub = new StringBuffer(); 057 058 // Buffers for each possible sequence property line 059 private StringBuffer idb = null; 060 private StringBuffer acb = null; 061 private StringBuffer dtb = null; 062 private StringBuffer deb = null; 063 private StringBuffer svb = null; 064 private StringBuffer kwb = null; 065 private StringBuffer osb = null; 066 private StringBuffer ocb = null; 067 private StringBuffer ccb = null; 068 private StringBuffer ftb = new StringBuffer(); 069 070 // Static variables 071 static int LOCATION_WIDTH = 6; 072 073 // Member variables 074 PrintStream mStream; 075 076 // Constructors and initialization 077 078 /** 079 * Creates a new <code>SwissprotFileFormer</code> using 080 * <code>System.out</code> stream. 081 */ 082 protected SwissprotFileFormer() 083 { 084 super(); 085 this.mStream = System.out; 086 } 087 088 /** 089 * Creates a new <code>SwissprotFileFormer</code> using the 090 * specified stream. 091 * 092 * @param theStream a <code>PrintStream</code> object. 093 */ 094 protected SwissprotFileFormer(PrintStream theStream) 095 { 096 super(); 097 this.mStream = theStream; 098 } 099 100 // Interface implementations 101 // SeqIOListener methods 102 103 /** 104 * Start the processing of a sequence. This method exists primarily 105 * to enforce the life-cycles of SeqIOListener objects. 106 */ 107 public void startSequence() throws ParseException 108 { 109 } 110 111 /** 112 * Notify the listener that processing of the sequence is complete. 113 */ 114 public void endSequence() throws ParseException 115 { 116 } 117 118 /** 119 * The name is printed out as part of the identifier line. It will be 120 * replaced if an ID keyword exists in the annotations. 121 * 122 * @param theName the String that should be returned by getName for the sequence 123 * being parsed 124 */ 125 public void setName(String theName) throws ParseException 126 { 127 idb = new StringBuffer("ID " + theName); 128 } 129 130 /** 131 * Null implementation. This object formats and prints a sequence. The 132 * URI alone cannot be printed in Swissprot format. Therefore, it's 133 * easiest to ignore it. 134 * @param theURI the new URI of the sequence 135 */ 136 public void setURI(String theURI) throws ParseException 137 { 138 } 139 140 /** 141 * Prints out the sequences properties in order. 142 * Prints out the symbol array passed in in lines of 60, blocks of 10 143 * 144 * @param theAlphabet The alphabet of the symbol data 145 * @param theSymbols An array containing symbols 146 * @param theStart The start offset of valid data within the array 147 * @param theLength The number of valid symbols in the array 148 * 149 * @throws IllegalAlphabetException if we can't cope with this 150 * alphabet. 151 */ 152 public void addSymbols(Alphabet theAlphabet, 153 Symbol[] theSymbols, 154 int theStart, 155 int theLength) 156 throws IllegalAlphabetException 157 { 158 159 PrintStream stream = this.getPrintStream(); 160 161 // Print out all of the sequence properties in order 162 if (idb != null) {stream.println(idb); stream.println("XX");} 163 if (acb != null) {stream.println(acb); stream.println("XX");} 164 if (svb != null) {stream.println(svb); stream.println("XX");} 165 if (dtb != null) {stream.println(dtb); stream.println("XX");} 166 if (deb != null) {stream.println(deb); stream.println("XX");} 167 if (kwb != null) {stream.println(kwb); stream.println("XX");} 168 if (osb != null) {stream.println(osb);} 169 if (ocb != null) {stream.println(ocb); stream.println("XX");} 170 if (ccb != null) {stream.println(ccb); stream.println("XX");} 171 if (ftb.length() != 0) { 172 stream.print(ftb); 173 } 174 175 this.printOutSequenceHeaderLine(theAlphabet, theSymbols, theStart, theLength); 176 177 List brokenLines = this.breakSymbolArray(theAlphabet, theSymbols, 178 theStart, theLength); 179 180 java.util.Iterator iterator = brokenLines.iterator(); 181 String leader = " "; 182 while(iterator.hasNext()) 183 { 184 stream.print(leader + iterator.next() + nl); 185 } 186 stream.println("//"); 187 } 188 189 /** 190 * Formats sequence properties into form suitable for printing to 191 * file. 192 * 193 * @param key The key of the sequence property 194 * @param value The value of the sequence property 195 * 196 * @returns Properly formated string 197 */ 198 private String sequenceBufferCreator(Object key, Object value) { 199 StringBuffer temp = new StringBuffer(); 200 201 if (value == null) { 202 temp.append((String) key); 203 } 204 else if (value instanceof ArrayList) { 205 Iterator iter = ((ArrayList) value).iterator(); 206 while (iter.hasNext()) { 207 temp.append((String) key + " " + iter.next()); 208 if (iter.hasNext()) 209 temp.append(nl); 210 } 211 } 212 else { 213 StringTokenizer valueToke = new StringTokenizer((String) value, " "); 214 int fullline = 80; 215 int length = 0; 216 String token = valueToke.nextToken(); 217 218 while (true) { 219 temp.append((String) key + " "); 220 length = (temp.length() % (fullline + 1)) + token.length() + 1; 221 if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); 222 while (length <= fullline && valueToke.hasMoreTokens()) { 223 temp.append(" " + token); 224 token = valueToke.nextToken(); 225 length = (temp.length() % (fullline + 1)) + token.length() + 1; 226 if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); 227 } 228 if (valueToke.hasMoreTokens()) { 229 for(int i = length-token.length(); i < fullline; i++) { 230 temp.append(" "); 231 } 232 temp.append(nl); 233 } 234 else if (length <= fullline) { 235 temp.append(" " + token); 236 break; 237 } 238 else { 239 temp.append(nl); 240 temp.append((String) key + " " + token); 241 break; 242 } 243 } 244 } 245 246 return temp.toString(); 247 } 248 249 /** 250 * Notify the listener of a sequence-wide property. This might 251 * be stored as an entry in the sequence's annotation bundle. 252 * Checks for possible known properties to be shown in the file. 253 * 254 * @param key Key the property will be stored under 255 * @param value Value stored under the key 256 */ 257 public void addSequenceProperty(Object key, Object value) throws ParseException 258 { 259 if (key.equals("ID")) { 260 idb.setLength(0); 261 idb.append("ID " + (String) value); 262 } 263 else if (key.equals("DT") || key.equals("MDAT")) { 264 dtb = new StringBuffer(sequenceBufferCreator("DT", value)); 265 } 266 else if (key.equals("DE") || key.equals("DEFINITION")) { 267 deb = new StringBuffer(sequenceBufferCreator("DE", value)); 268 } 269 else if (key.equals("SV") || key.equals("VERSION")) { 270 svb = new StringBuffer(sequenceBufferCreator("SV", value)); 271 } 272 else if (key.equals("KW") || key.equals("KEYWORDS")) { 273 kwb = new StringBuffer(sequenceBufferCreator("KW", value)); 274 } 275 else if (key.equals("OS") || key.equals("SOURCE")) { 276 osb = new StringBuffer(sequenceBufferCreator("OS", value)); 277 } 278 else if (key.equals("OC") || key.equals("ORGANISM")) { 279 ocb = new StringBuffer(sequenceBufferCreator("OC", value)); 280 } 281 else if (key.equals("CC") || key.equals("COMMENT")) { 282 ccb = new StringBuffer(sequenceBufferCreator("CC", value)); 283 } 284 else if (key.equals(SwissprotProcessor.PROPERTY_SWISSPROT_ACCESSIONS)) 285 { 286 acb = new StringBuffer(); 287 acb.append("AC "); 288 for (Iterator ai = ((List) value).iterator(); ai.hasNext();) 289 { 290 acb.append((String) ai.next()); 291 acb.append(";"); 292 } 293 } 294 } 295 296 /** 297 * Null implementation. 298 * 299 * @param templ The template for this new feature object 300 */ 301 public void startFeature(Feature.Template templ) throws ParseException 302 { 303 // There are 19 spaces in the leader 304 String leader = "FT "; 305 306 ub.setLength(0); 307 ub.append(leader); 308 309 StringBuffer lb = formatLocation(ub, templ.location); 310 311 lb.replace(5, 5 + templ.type.length(), templ.type); 312 ftb.append(lb + nl); 313 } 314 315 /** 316 * Null implementation. 317 */ 318 public void endFeature() throws ParseException 319 { 320 } 321 322 /** 323 * Null implementation 324 * 325 * @param key Key the property will be stored under 326 * @param value Value stored under the key 327 */ 328 329 public void addFeatureProperty(Object key, Object value) throws ParseException 330 { 331 // There are 19 spaces in the leader 332 String leader = "FT "; 333 334 // Don't print internal data structures 335 if (key.equals(Feature.PROPERTY_DATA_KEY)) 336 return; 337 338 // The value may be a collection if several qualifiers of the 339 // same type are present in a feature 340 if (Collection.class.isInstance(value)) 341 { 342 for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();) 343 { 344 qb.setLength(0); 345 ub.setLength(0); 346 StringBuffer fb = formatQualifierBlock(qb, 347 formatQualifier(ub, key, vi.next()).toString(), 348 leader, 349 80); 350 ftb.append(fb + nl); 351 } 352 } 353 else 354 { 355 qb.setLength(0); 356 ub.setLength(0); 357 StringBuffer fb = formatQualifierBlock(qb, 358 formatQualifier(ub, key, value).toString(), 359 leader, 360 80); 361 ftb.append(fb + nl); 362 } 363 } 364 365 // SeqFileFormer methods 366 /** 367 * <code>getPrintStream</code> returns the 368 * <code>PrintStream</code> to which an instance of SwissprotFileFormer 369 * will write the formatted data. The default is System.out 370 * 371 * @return the <code>PrintStream</code> which will be written to. 372 */ 373 public PrintStream getPrintStream() 374 { 375 return(this.mStream); 376 } 377 378 /** 379 * <code>setPrintStream</code> informs an instance which 380 * <code>PrintStream</code> to use. 381 * 382 * @param theStream a <code>PrintStream</code> to write to. 383 */ 384 public void setPrintStream(PrintStream theStream) 385 { 386 this.mStream = theStream; 387 } 388 389 /** 390 * <code>formatLocation</code> creates a String representation of 391 * a <code>Location</code>. Strand information is ignored, as Swissprot 392 * files represent proteins. An alternative form of this function does not 393 * take a Strand; that form is available only on SwissprotFileFormer; it 394 * is not part of the SeqFileFormer interface. 395 * 396 * @param theBuffer a <code>StringBuffer</code> to append the location 397 * to. 398 * @param theLocation a <code>Location</code> to format. 399 * @param theStrand a <code>StrandedFeature.Strand</code> indicating nothing 400 * of relevance 401 * 402 * @return a <code>StringBuffer</code> with the location appended. 403 */ 404 public StringBuffer formatLocation(StringBuffer theBuffer, 405 Location theLocation, 406 StrandedFeature.Strand theStrand) 407 { 408 return(this.formatLocation(theBuffer, theLocation)); 409 } 410 411 /** 412 * Creates a string representation of the location of a feature 413 * 414 * @param theFeature The feature with the location to format 415 * @return String The formatted location 416 */ 417 public String formatLocation(Feature theFeature) 418 { 419 StringBuffer toReturn = this.formatLocation(new StringBuffer(), theFeature.getLocation()); 420 return toReturn.toString(); 421 } 422 423 // Public methods 424 /** 425 * <code>formatLocation</code> creates a String representation of 426 * a <code>Location</code>. The stringbuffer returned represents columns 427 * 15-27 of the Swissprot feature table entry. An alternative form of this 428 * function takes a Strand; that form is part of the SeqFileFormer 429 * interface. 430 * 431 * @param theBuffer a <code>StringBuffer</code> to append the location 432 * to. 433 * @param theLocation a <code>Location</code> to format. 434 * 435 * @return a <code>StringBuffer</code> with the location appended. 436 */ 437 public StringBuffer formatLocation(StringBuffer theBuffer, 438 Location theLocation) 439 { 440 // Five Location cases, each treated seperately: 441 // Point Location: " 5 5" 442 // Range Location: " 5 10" 443 // Fuzzy Location: " <5 10" 444 // Fuzzy Location: " ? 10" 445 // Fuzzy Location: " ?24 35" (Not in the current 446 // specification, but used anyways 447 StringBuffer startPoint = new StringBuffer(LOCATION_WIDTH); 448 StringBuffer endPoint = new StringBuffer(LOCATION_WIDTH); 449 if((theLocation instanceof PointLocation) || 450 (theLocation instanceof RangeLocation)) 451 { 452 // Point Location: " 5 5" 453 // Range Location: " 5 10" 454 startPoint = formatPoint(theLocation.getMin(), theLocation.getMin(), false); 455 endPoint = formatPoint(theLocation.getMax(), theLocation.getMax(), false); 456 } 457 else if(theLocation instanceof FuzzyLocation) 458 { 459 // Handle all fuzzy location types through the magic of delegation. 460 // If you pass things around long enough, someone's bound to do it 461 // for you 462 FuzzyLocation tempLocation = (FuzzyLocation)theLocation; 463 //System.out.println("OuterMin: " + tempLocation.getOuterMin()); 464 //System.out.println("InnerMin: " + tempLocation.getInnerMin()); 465 //System.out.println("InnerMax: " + tempLocation.getInnerMax()); 466 //System.out.println("OuterMax: " + tempLocation.getOuterMax()); 467 startPoint = this.formatPoint(tempLocation.getOuterMin(), 468 tempLocation.getInnerMin(), tempLocation.isMinFuzzy()); 469 endPoint = this.formatPoint(tempLocation.getInnerMax(), 470 tempLocation.getOuterMax(), tempLocation.isMaxFuzzy()); 471 } 472 473 return new StringBuffer(startPoint.toString() + " " + endPoint.toString()); 474 } 475 476 // Protected methods 477 /** 478 * Prints out sequence header with only length data. 479 * 480 * @param theAlphabet The alphabet of the symbol data 481 * @param theSymbols An array containing symbols 482 * @param theStart The start offset of valid data within the array 483 * @param theLength The number of valid symbols in the array 484 * 485 * @throws IllegalAlphabetException if we can't cope with this 486 * alphabet. 487 */ 488 protected void printOutSequenceHeaderLine(Alphabet theAlphabet, 489 Symbol[] theSymbols, 490 int theStart, 491 int theLength) 492 throws IllegalAlphabetException 493 { 494 this.getPrintStream().println("SQ SEQUENCE " + theLength + " AA; "); 495 } 496 497 /** 498 * Converts the symbol list passed in into an array of strings. The 499 * strings will be blocks of ten, with six blocks on a line. 500 * 501 * @param theAlphabet The alphabet of the symbol data 502 * @param theSymbols An array containing symbols 503 * @param theStart The start offset of valid data within the array 504 * @param theLength The number of valid symbols in the array 505 * @return The symbol list passed in broken into blocks of ten 506 * characters, six to a string. 507 * 508 * @throws IllegalAlphabetException if we can't cope with this 509 * alphabet. 510 */ 511 protected List breakSymbolArray(Alphabet theAlphabet, 512 Symbol[] theSymbols, 513 int theStart, 514 int theLength) 515 throws IllegalAlphabetException 516 { 517 List returnList = new ArrayList(theLength / 60 + 1); 518 int blockCount = 0; 519 int blockIndex = 0; 520 StringBuffer tempString = new StringBuffer(); 521 SymbolTokenization tokenization; 522 try { 523 tokenization = theAlphabet.getTokenization("token"); 524 } catch (Exception ex) { 525 throw new IllegalAlphabetException(ex, "Couldn't get tokenization for this alphabet"); 526 } 527 for(int i = theStart; i < theStart + theLength; i++) 528 { 529 try 530 { 531 theAlphabet.validate(theSymbols[i]); 532 } 533 catch (IllegalSymbolException e) 534 { 535 throw new IllegalAlphabetException(e); 536 } 537 538 // Every six completed blocks, put on the stack to return 539 if(blockIndex == 10) 540 { 541 tempString.append(' '); 542 blockIndex = 0; 543 blockCount++; 544 } 545 546 if(blockCount == 6) 547 { 548 returnList.add(tempString.substring(0)); 549 tempString.setLength(0); 550 blockCount = 0; 551 blockIndex = 0; 552 } 553 try { 554 tempString.append(tokenization.tokenizeSymbol(theSymbols[i])); 555 } catch (IllegalSymbolException ex) { 556 throw new IllegalAlphabetException(ex, "Couldn't tokenize symbols"); 557 } 558 blockIndex++; 559 } 560 561 // Add the last line on 562 if(tempString.length() != 0) 563 { 564 returnList.add(tempString.substring(0)); 565 } 566 return returnList; 567 } 568 569 /** 570 * Simple method that adds spaces onto the buffer passed in. This method 571 * exists to refactor some code used in location formatting. It isn't 572 * intended to be generally used. 573 * 574 * @param theBuffer Buffer to append whitespace to. 575 * @param theLength Ammount of whitespace to append. 576 */ 577 protected void fillBuffer(StringBuffer theBuffer, int theLength) 578 { 579 for(int i = 0; i < theLength; i++) 580 { 581 theBuffer.append(' '); 582 } 583 } 584 585 /** 586 * Formats the points from fuzzy locations. This is called easily with 587 * this.formatPoint(FuzzyLocation.getInnerMax(), FuzzyLocation.getOuterMax(), FuzzyLocation.isFuzzyMax()) 588 * 589 * @param theMaxIndex Inner index of the fuzzy point 590 * @param theMinIndex Outer index of the fuzzy point 591 * @param isFuzzy Indicates if this point is fuzzy 592 */ 593 protected StringBuffer formatPoint(int theMinIndex, int theMaxIndex, boolean isFuzzy) 594 { 595 StringBuffer bufferToReturn = new StringBuffer(LOCATION_WIDTH); 596 if(isFuzzy == false) 597 { 598 String tempString = Integer.toString(theMinIndex); 599 int offset = LOCATION_WIDTH - tempString.length(); 600 this.fillBuffer(bufferToReturn, offset); 601 bufferToReturn.append(tempString); 602 } 603 else 604 { 605 // MIN_VALUE to MAX_VALUE is the ? location regardless of which end is which 606 if((theMinIndex == Integer.MIN_VALUE) && (theMaxIndex == Integer.MAX_VALUE)) 607 { 608 int offset = LOCATION_WIDTH - 1; 609 this.fillBuffer(bufferToReturn, offset); 610 bufferToReturn.append('?'); 611 } 612 // If the outer index is MIN_VALUE, that's <n 613 else if(theMinIndex == Integer.MIN_VALUE) 614 { 615 String tempString = Integer.toString(theMaxIndex); 616 int offset = LOCATION_WIDTH - tempString.length() - 1; 617 this.fillBuffer(bufferToReturn, offset); 618 bufferToReturn.append('<'); 619 bufferToReturn.append(tempString); 620 } 621 // If the outer index is MAX_VALUE, that's >n 622 else if(theMaxIndex == Integer.MAX_VALUE) 623 { 624 String tempString = Integer.toString(theMinIndex); 625 int offset = LOCATION_WIDTH - tempString.length() - 1; 626 this.fillBuffer(bufferToReturn, offset); 627 bufferToReturn.append('>'); 628 bufferToReturn.append(tempString); 629 } 630 // The only swissprot location left is ?nn 631 else if(theMinIndex == theMaxIndex) 632 { 633 String tempString = Integer.toString(theMinIndex); 634 int offset = LOCATION_WIDTH - tempString.length() - 1; 635 this.fillBuffer(bufferToReturn, offset); 636 bufferToReturn.append('?'); 637 bufferToReturn.append(tempString); 638 } 639 else 640 { 641 // The location cannot be formatted in Swissprot format 642 // Revisit 643 System.out.println("Error in formatPoint"); 644 System.out.println("\tInner: " + theMinIndex); 645 System.out.println("\tOuter: " + theMaxIndex); 646 System.out.println("\tFuzzy: " + isFuzzy); 647 } 648 } 649 return bufferToReturn; 650 } 651 652 // Private methods 653}