001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021/* 022 * Created on 2005-08-01 023 */ 024 025package org.biojava.bio.alignment; 026 027import java.io.BufferedReader; 028import java.io.File; 029import java.io.FileReader; 030import java.io.IOException; 031import java.io.InputStreamReader; 032import java.io.Reader; 033import java.io.Serializable; 034import java.io.StringReader; 035import java.util.HashMap; 036import java.util.Iterator; 037import java.util.Map; 038import java.util.NoSuchElementException; 039import java.util.StringTokenizer; 040 041import org.biojava.bio.BioException; 042import org.biojava.bio.seq.DNATools; 043import org.biojava.bio.seq.ProteinTools; 044import org.biojava.bio.seq.io.SymbolTokenization; 045import org.biojava.bio.symbol.AlphabetManager; 046import org.biojava.bio.symbol.FiniteAlphabet; 047import org.biojava.bio.symbol.IllegalSymbolException; 048import org.biojava.bio.symbol.Symbol; 049 050/** 051 * <p> 052 * This object is able to read a substitution matrix file and constructs a short 053 * matrix in memory. Every single element of the matrix can be accessed by the 054 * method <code>getValueAt</code> with the parameters being two BioJava symbols. 055 * This is why it is not necessary to access the matrix directly. If there is no 056 * value for the two specified <code>Symbol</code>s an <code>Exception</code> is 057 * thrown. 058 * </p> 059 * <p> 060 * Substitution matrix files, are available at <a 061 * href="ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/"> the NCBI FTP 062 * directory</a>. 063 * </p> 064 * 065 * @author Andreas Dräger <andreas.draeger@uni-tuebingen.de> 066 */ 067public class SubstitutionMatrix implements Serializable { 068 069 /** 070 * 071 */ 072 private Map<Symbol, Integer> rowSymbols, colSymbols; 073 074 /** 075 * 076 */ 077 private short[][] matrix; 078 079 /** 080 * Minimal and maximal entry in this matrix 081 */ 082 private short min, max; 083 084 /** 085 * The alphabet used by this matrix. 086 */ 087 private FiniteAlphabet alphabet; 088 089 /** 090 * Name and description of this matrix. 091 */ 092 private String description, name; 093 094 /** 095 * Just the new line symbol of the system. 096 */ 097 private static final String newLine = System.getProperty("line.separator"); 098 099 /** 100 * This constructs a <code>SubstitutionMatrix</code> object that contains 101 * two <code>Map</code> data structures having BioJava symbols as keys and 102 * the value being the index of the matrix containing the substitution 103 * score. 104 * 105 * @param alpha 106 * the alphabet of the matrix (e.g., DNA, RNA or PROTEIN, or 107 * PROTEIN-TERM) 108 * @param matrixFile 109 * the file containing the substitution matrix. Lines starting 110 * with '<code>#</code>' are comments. The line starting with a 111 * white space, is the table head. Every line has to start with 112 * the one letter representation of the Symbol and then the 113 * values for the exchange. 114 * @throws IOException 115 * @throws BioException 116 * @throws NumberFormatException 117 */ 118 public SubstitutionMatrix(FiniteAlphabet alpha, File matrixFile) 119 throws BioException, NumberFormatException, IOException { 120 this.alphabet = alpha; 121 this.description = ""; 122 this.name = matrixFile.getName(); 123 this.rowSymbols = new HashMap<Symbol, Integer>(); 124 this.colSymbols = new HashMap<Symbol, Integer>(); 125 this.matrix = this.parseMatrix(matrixFile); 126 } 127 128 /** 129 * With this constructor it is possible to construct a SubstitutionMatrix 130 * object from a substitution matrix file. The given String contains a 131 * number of lines separated by 132 * <code>System.getProperty("line.separator")</code>. Everything else is the 133 * same than for the constructor above. 134 * 135 * @param alpha 136 * The <code>FiniteAlphabet</code> to use 137 * @param matrixString 138 * @param name 139 * of the matrix. 140 * @throws BioException 141 * @throws IOException 142 * @throws NumberFormatException 143 */ 144 public SubstitutionMatrix(FiniteAlphabet alpha, String matrixString, 145 String name) throws BioException, NumberFormatException, 146 IOException { 147 this.alphabet = alpha; 148 this.description = ""; 149 this.name = name; 150 this.rowSymbols = new HashMap<Symbol, Integer>(); 151 this.colSymbols = new HashMap<Symbol, Integer>(); 152 this.matrix = this.parseMatrix(matrixString); 153 // this.printMatrix(); 154 } 155 156 /** 157 * Constructs a SubstitutionMatrix with every Match and every Replace having 158 * the same expenses given by the parameters. Ambiguous symbols are not 159 * considered because there might be to many of them (for proteins). 160 * 161 * @param alpha 162 * @param match 163 * @param replace 164 */ 165 public SubstitutionMatrix(FiniteAlphabet alpha, short match, short replace) { 166 int i = 0, j = 0; 167 168 this.alphabet = alpha; 169 this.description = "Identity matrix. All replaces and all matches are treated equally."; 170 this.name = "IDENTITY_" + match + "_" + replace; 171 this.rowSymbols = new HashMap<Symbol, Integer>(); 172 this.colSymbols = new HashMap<Symbol, Integer>(); 173 this.matrix = new short[alpha.size()][alpha.size()]; 174 175 Symbol[] sym = new Symbol[alpha.size()]; 176 Iterator<Symbol> iter = alpha.iterator(); 177 178 for (i = 0; iter.hasNext(); i++) { 179 sym[i] = iter.next(); 180 rowSymbols.put(sym[i], new Integer(i)); 181 colSymbols.put(sym[i], new Integer(i)); 182 } 183 184 for (i = 0; i < alphabet.size(); i++) 185 for (j = 0; j < alphabet.size(); j++) 186 if (sym[i].getMatches().contains(sym[j])) 187 matrix[i][j] = match; 188 else 189 matrix[i][j] = replace; 190 191 // this.printMatrix(); 192 } 193 194 /** 195 * This constructor can be used to guess the alphabet of this substitution 196 * matrix. However, it is recommended to apply another constructor if the 197 * alphabet is known. 198 * 199 * @param file 200 * A file containing a substitution matrix. 201 * @throws NumberFormatException 202 * @throws NoSuchElementException 203 * @throws BioException 204 * @throws IOException 205 */ 206 public SubstitutionMatrix(File file) throws NumberFormatException, 207 NoSuchElementException, BioException, IOException { 208 this(guessAlphabet(file), file); 209 } 210 211 /** 212 * This constructor can be used to guess the alphabet of this substitution 213 * matrix. However, it is recommended to apply another constructor if the 214 * alphabet is known. 215 * 216 * @param reader 217 * @throws NumberFormatException 218 * @throws BioException 219 * @throws IOException 220 */ 221 public static SubstitutionMatrix getSubstitutionMatrix(BufferedReader reader) 222 throws NumberFormatException, BioException, IOException { 223 StringBuffer stringMatrix = new StringBuffer(""); 224 while (reader.ready()) { 225 stringMatrix.append(reader.readLine()); 226 stringMatrix.append(newLine); 227 } 228 reader.close(); 229 String mat = stringMatrix.toString(); 230 FiniteAlphabet alpha = guessAlphabet(new BufferedReader( 231 new StringReader(mat))); 232 SubstitutionMatrix matrix = new SubstitutionMatrix(alpha, mat, 233 "unknown"); 234 return matrix; 235 } 236 237 /** 238 * Return a new substitution matrix with the specified alphabet. 239 * 240 * @param alphabet alphabet, must not be null 241 * @param reader reader, must not be null 242 * @return a new substitution matrix with the specified alphabet 243 * @throws BioException if an error occurs 244 * @throws IOException if an I/O error occurs 245 */ 246 public static SubstitutionMatrix getSubstitutionMatrix(final FiniteAlphabet alphabet, final BufferedReader reader) throws BioException, IOException { 247 if (alphabet == null) { 248 throw new NullPointerException("alphabet must not be null"); 249 } 250 if (reader == null) { 251 throw new NullPointerException("reader must not be null"); 252 } 253 return new SubstitutionMatrix(alphabet, toString(reader), "unknown"); 254 } 255 256 private static String toString(final BufferedReader reader) throws IOException { 257 StringBuilder sb = new StringBuilder(); 258 try { 259 while (reader.ready()) { 260 String line = reader.readLine(); 261 if (line == null) { 262 break; 263 } 264 sb.append(line); 265 sb.append(newLine); 266 } 267 return sb.toString(); 268 } 269 finally { 270 try { 271 reader.close(); 272 } 273 catch (Exception e) { 274 // ignore 275 } 276 } 277 } 278 279 /** 280 * Return a new substitution matrix with the specified alphabet and name. 281 * 282 * @param alphabet alphabet, must not be null 283 * @param reader reader, must not be null 284 * @param name name, must not be null 285 * @return a new substitution matrix with the specified alphabet and name 286 * @throws BioException if an error occurs 287 * @throws IOException if an I/O error occurs 288 */ 289 public static SubstitutionMatrix getSubstitutionMatrix(final FiniteAlphabet alphabet, final BufferedReader reader, final String name) throws BioException, IOException { 290 if (alphabet == null) { 291 throw new NullPointerException("alphabet must not be null"); 292 } 293 if (reader == null) { 294 throw new NullPointerException("reader must not be null"); 295 } 296 if (name == null) { 297 throw new NullPointerException("name must not be null"); 298 } 299 return new SubstitutionMatrix(alphabet, toString(reader), name); 300 } 301 302 private static BufferedReader readResource(final String name) { 303 return new BufferedReader(new InputStreamReader(SubstitutionMatrix.class.getResourceAsStream(name))); 304 } 305 306 private static SubstitutionMatrix getNucleotideMatrix(final String name) { 307 try { 308 return getSubstitutionMatrix(DNATools.getDNA(), readResource(name), name); 309 } 310 catch (BioException e) { 311 throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e); 312 } 313 catch (IOException e) { 314 throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e); 315 } 316 } 317 318 private static SubstitutionMatrix getAminoAcidMatrix(final String name) { 319 try { 320 return getSubstitutionMatrix(ProteinTools.getTAlphabet(), readResource(name), name); 321 } 322 catch (BioException e) { 323 throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e); 324 } 325 catch (IOException e) { 326 throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e); 327 } 328 } 329 330 331 /** 332 * Return the <code>BLOSUM100</code> amino acid substitution matrix. 333 * 334 * @return the <code>BLOSUM100</code> amino acid substitution matrix 335 */ 336 public static SubstitutionMatrix getBlosum100() { 337 return getAminoAcidMatrix("BLOSUM100"); 338 } 339 340 /** 341 * Return the <code>BLOSUM100.50</code> amino acid substitution matrix. 342 * 343 * @return the <code>BLOSUM100.50</code> amino acid substitution matrix 344 */ 345 public static SubstitutionMatrix getBlosum100_50() { 346 return getAminoAcidMatrix("BLOSUM100.50"); 347 } 348 349 /** 350 * Return the <code>BLOSUM30</code> amino acid substitution matrix. 351 * 352 * @return the <code>BLOSUM30</code> amino acid substitution matrix 353 */ 354 public static SubstitutionMatrix getBlosum30() { 355 return getAminoAcidMatrix("BLOSUM30"); 356 } 357 358 /** 359 * Return the <code>BLOSUM30.50</code> amino acid substitution matrix. 360 * 361 * @return the <code>BLOSUM30.50</code> amino acid substitution matrix 362 */ 363 public static SubstitutionMatrix getBlosum30_50() { 364 return getAminoAcidMatrix("BLOSUM30.50"); 365 } 366 367 /** 368 * Return the <code>BLOSUM35</code> amino acid substitution matrix. 369 * 370 * @return the <code>BLOSUM35</code> amino acid substitution matrix 371 */ 372 public static SubstitutionMatrix getBlosum35() { 373 return getAminoAcidMatrix("BLOSUM35"); 374 } 375 376 /** 377 * Return the <code>BLOSUM35.50</code> amino acid substitution matrix. 378 * 379 * @return the <code>BLOSUM35.50</code> amino acid substitution matrix 380 */ 381 public static SubstitutionMatrix getBlosum35_50() { 382 return getAminoAcidMatrix("BLOSUM35.50"); 383 } 384 385 /** 386 * Return the <code>BLOSUM40</code> amino acid substitution matrix. 387 * 388 * @return the <code>BLOSUM40</code> amino acid substitution matrix 389 */ 390 public static SubstitutionMatrix getBlosum40() { 391 return getAminoAcidMatrix("BLOSUM40"); 392 } 393 394 /** 395 * Return the <code>BLOSUM40.50</code> amino acid substitution matrix. 396 * 397 * @return the <code>BLOSUM40.50</code> amino acid substitution matrix 398 */ 399 public static SubstitutionMatrix getBlosum40_50() { 400 return getAminoAcidMatrix("BLOSUM40.50"); 401 } 402 403 /** 404 * Return the <code>BLOSUM45</code> amino acid substitution matrix. 405 * 406 * @return the <code>BLOSUM45</code> amino acid substitution matrix 407 */ 408 public static SubstitutionMatrix getBlosum45() { 409 return getAminoAcidMatrix("BLOSUM45"); 410 } 411 412 /** 413 * Return the <code>BLOSUM45.50</code> amino acid substitution matrix. 414 * 415 * @return the <code>BLOSUM45.50</code> amino acid substitution matrix 416 */ 417 public static SubstitutionMatrix getBlosum45_50() { 418 return getAminoAcidMatrix("BLOSUM45.50"); 419 } 420 421 /** 422 * Return the <code>BLOSUM50</code> amino acid substitution matrix. 423 * 424 * @return the <code>BLOSUM50</code> amino acid substitution matrix 425 */ 426 public static SubstitutionMatrix getBlosum50() { 427 return getAminoAcidMatrix("BLOSUM50"); 428 } 429 430 /** 431 * Return the <code>BLOSUM50.50</code> amino acid substitution matrix. 432 * 433 * @return the <code>BLOSUM50.50</code> amino acid substitution matrix 434 */ 435 public static SubstitutionMatrix getBlosum50_50() { 436 return getAminoAcidMatrix("BLOSUM50.50"); 437 } 438 439 /** 440 * Return the <code>BLOSUM55</code> amino acid substitution matrix. 441 * 442 * @return the <code>BLOSUM55</code> amino acid substitution matrix 443 */ 444 public static SubstitutionMatrix getBlosum55() { 445 return getAminoAcidMatrix("BLOSUM55"); 446 } 447 448 /** 449 * Return the <code>BLOSUM55.50</code> amino acid substitution matrix. 450 * 451 * @return the <code>BLOSUM55.50</code> amino acid substitution matrix 452 */ 453 public static SubstitutionMatrix getBlosum55_50() { 454 return getAminoAcidMatrix("BLOSUM55.50"); 455 } 456 457 /** 458 * Return the <code>BLOSUM60</code> amino acid substitution matrix. 459 * 460 * @return the <code>BLOSUM60</code> amino acid substitution matrix 461 */ 462 public static SubstitutionMatrix getBlosum60() { 463 return getAminoAcidMatrix("BLOSUM60"); 464 } 465 466 /** 467 * Return the <code>BLOSUM60.50</code> amino acid substitution matrix. 468 * 469 * @return the <code>BLOSUM60.50</code> amino acid substitution matrix 470 */ 471 public static SubstitutionMatrix getBlosum60_50() { 472 return getAminoAcidMatrix("BLOSUM60.50"); 473 } 474 475 /** 476 * Return the <code>BLOSUM62</code> amino acid substitution matrix. 477 * 478 * @return the <code>BLOSUM62</code> amino acid substitution matrix 479 */ 480 public static SubstitutionMatrix getBlosum62() { 481 return getAminoAcidMatrix("BLOSUM62"); 482 } 483 484 /** 485 * Return the <code>BLOSUM62.50</code> amino acid substitution matrix. 486 * 487 * @return the <code>BLOSUM62.50</code> amino acid substitution matrix 488 */ 489 public static SubstitutionMatrix getBlosum62_50() { 490 return getAminoAcidMatrix("BLOSUM62.50"); 491 } 492 493 /** 494 * Return the <code>BLOSUM65</code> amino acid substitution matrix. 495 * 496 * @return the <code>BLOSUM65</code> amino acid substitution matrix 497 */ 498 public static SubstitutionMatrix getBlosum65() { 499 return getAminoAcidMatrix("BLOSUM65"); 500 } 501 502 /** 503 * Return the <code>BLOSUM65.50</code> amino acid substitution matrix. 504 * 505 * @return the <code>BLOSUM65.50</code> amino acid substitution matrix 506 */ 507 public static SubstitutionMatrix getBlosum65_50() { 508 return getAminoAcidMatrix("BLOSUM65.50"); 509 } 510 511 /** 512 * Return the <code>BLOSUM70</code> amino acid substitution matrix. 513 * 514 * @return the <code>BLOSUM70</code> amino acid substitution matrix 515 */ 516 public static SubstitutionMatrix getBlosum70() { 517 return getAminoAcidMatrix("BLOSUM70"); 518 } 519 520 /** 521 * Return the <code>BLOSUM70.50</code> amino acid substitution matrix. 522 * 523 * @return the <code>BLOSUM70.50</code> amino acid substitution matrix 524 */ 525 public static SubstitutionMatrix getBlosum70_50() { 526 return getAminoAcidMatrix("BLOSUM70.50"); 527 } 528 529 /** 530 * Return the <code>BLOSUM75</code> amino acid substitution matrix. 531 * 532 * @return the <code>BLOSUM75</code> amino acid substitution matrix 533 */ 534 public static SubstitutionMatrix getBlosum75() { 535 return getAminoAcidMatrix("BLOSUM75"); 536 } 537 538 /** 539 * Return the <code>BLOSUM75.50</code> amino acid substitution matrix. 540 * 541 * @return the <code>BLOSUM75.50</code> amino acid substitution matrix 542 */ 543 public static SubstitutionMatrix getBlosum75_50() { 544 return getAminoAcidMatrix("BLOSUM75.50"); 545 } 546 547 /** 548 * Return the <code>BLOSUM80</code> amino acid substitution matrix. 549 * 550 * @return the <code>BLOSUM80</code> amino acid substitution matrix 551 */ 552 public static SubstitutionMatrix getBlosum80() { 553 return getAminoAcidMatrix("BLOSUM80"); 554 } 555 556 /** 557 * Return the <code>BLOSUM80.50</code> amino acid substitution matrix. 558 * 559 * @return the <code>BLOSUM80.50</code> amino acid substitution matrix 560 */ 561 public static SubstitutionMatrix getBlosum80_50() { 562 return getAminoAcidMatrix("BLOSUM80.50"); 563 } 564 565 /** 566 * Return the <code>BLOSUM85</code> amino acid substitution matrix. 567 * 568 * @return the <code>BLOSUM85</code> amino acid substitution matrix 569 */ 570 public static SubstitutionMatrix getBlosum85() { 571 return getAminoAcidMatrix("BLOSUM85"); 572 } 573 574 /** 575 * Return the <code>BLOSUM85.50</code> amino acid substitution matrix. 576 * 577 * @return the <code>BLOSUM85.50</code> amino acid substitution matrix 578 */ 579 public static SubstitutionMatrix getBlosum85_50() { 580 return getAminoAcidMatrix("BLOSUM85.50"); 581 } 582 583 /** 584 * Return the <code>BLOSUM90</code> amino acid substitution matrix. 585 * 586 * @return the <code>BLOSUM90</code> amino acid substitution matrix 587 */ 588 public static SubstitutionMatrix getBlosum90() { 589 return getAminoAcidMatrix("BLOSUM90"); 590 } 591 592 /** 593 * Return the <code>BLOSUM90.50</code> amino acid substitution matrix. 594 * 595 * @return the <code>BLOSUM90.50</code> amino acid substitution matrix 596 */ 597 public static SubstitutionMatrix getBlosum90_50() { 598 return getAminoAcidMatrix("BLOSUM90.50"); 599 } 600 601 /** 602 * Return the <code>BLOSUMN</code> amino acid substitution matrix. 603 * 604 * @return the <code>BLOSUMN</code> amino acid substitution matrix 605 */ 606 public static SubstitutionMatrix getBlosumn() { 607 return getAminoAcidMatrix("BLOSUMN"); 608 } 609 610 /** 611 * Return the <code>BLOSUMN.50</code> amino acid substitution matrix. 612 * 613 * @return the <code>BLOSUMN.50</code> amino acid substitution matrix 614 */ 615 public static SubstitutionMatrix getBlosumn_50() { 616 return getAminoAcidMatrix("BLOSUMN.50"); 617 } 618 619 /** 620 * Return the <code>DAYHOFF</code> amino acid substitution matrix. 621 * 622 * @return the <code>DAYHOFF</code> amino acid substitution matrix 623 */ 624 public static SubstitutionMatrix getDayhoff() { 625 return getAminoAcidMatrix("DAYHOFF"); 626 } 627 628 /** 629 * Return the <code>GONNET</code> amino acid substitution matrix. 630 * 631 * @return the <code>GONNET</code> amino acid substitution matrix 632 */ 633 public static SubstitutionMatrix getGonnet() { 634 return getAminoAcidMatrix("GONNET"); 635 } 636 637 /** 638 * Return the <code>IDENTITY</code> amino acid substitution matrix. 639 * 640 * @return the <code>IDENTITY</code> amino acid substitution matrix 641 */ 642 public static SubstitutionMatrix getIdentity() { 643 return getAminoAcidMatrix("IDENTITY"); 644 } 645 646 /** 647 * Return the <code>MATCH</code> amino acid substitution matrix. 648 * 649 * @return the <code>MATCH</code> amino acid substitution matrix 650 */ 651 public static SubstitutionMatrix getMatch() { 652 return getAminoAcidMatrix("MATCH"); 653 } 654 655 /** 656 * Return the <code>NUC.4.2</code> nucleotide substitution matrix. 657 * 658 * @return the <code>NUC.4.2</code> nucleotide substitution matrix 659 */ 660 public static SubstitutionMatrix getNuc4_2() { 661 return getNucleotideMatrix("NUC.4.2"); 662 } 663 664 /** 665 * Return the <code>NUC.4.4</code> nucleotide substitution matrix. 666 * 667 * @return the <code>NUC.4.4</code> nucleotide substitution matrix 668 */ 669 public static SubstitutionMatrix getNuc4_4() { 670 return getNucleotideMatrix("NUC.4.4"); 671 } 672 673 /** 674 * Return the <code>PAM10</code> amino acid substitution matrix. 675 * 676 * @return the <code>PAM10</code> amino acid substitution matrix 677 */ 678 public static SubstitutionMatrix getPam10() { 679 return getAminoAcidMatrix("PAM10"); 680 } 681 682 /** 683 * Return the <code>PAM100</code> amino acid substitution matrix. 684 * 685 * @return the <code>PAM100</code> amino acid substitution matrix 686 */ 687 public static SubstitutionMatrix getPam100() { 688 return getAminoAcidMatrix("PAM100"); 689 } 690 691 /** 692 * Return the <code>PAM110</code> amino acid substitution matrix. 693 * 694 * @return the <code>PAM110</code> amino acid substitution matrix 695 */ 696 public static SubstitutionMatrix getPam110() { 697 return getAminoAcidMatrix("PAM110"); 698 } 699 700 /** 701 * Return the <code>PAM120</code> amino acid substitution matrix. 702 * 703 * @return the <code>PAM120</code> amino acid substitution matrix 704 */ 705 public static SubstitutionMatrix getPam120() { 706 return getAminoAcidMatrix("PAM120"); 707 } 708 709 /** 710 * Return the <code>PAM130</code> amino acid substitution matrix. 711 * 712 * @return the <code>PAM130</code> amino acid substitution matrix 713 */ 714 public static SubstitutionMatrix getPam130() { 715 return getAminoAcidMatrix("PAM130"); 716 } 717 718 /** 719 * Return the <code>PAM140</code> amino acid substitution matrix. 720 * 721 * @return the <code>PAM140</code> amino acid substitution matrix 722 */ 723 public static SubstitutionMatrix getPam140() { 724 return getAminoAcidMatrix("PAM140"); 725 } 726 727 /** 728 * Return the <code>PAM150</code> amino acid substitution matrix. 729 * 730 * @return the <code>PAM150</code> amino acid substitution matrix 731 */ 732 public static SubstitutionMatrix getPam150() { 733 return getAminoAcidMatrix("PAM150"); 734 } 735 736 /** 737 * Return the <code>PAM160</code> amino acid substitution matrix. 738 * 739 * @return the <code>PAM160</code> amino acid substitution matrix 740 */ 741 public static SubstitutionMatrix getPam160() { 742 return getAminoAcidMatrix("PAM160"); 743 } 744 745 /** 746 * Return the <code>PAM170</code> amino acid substitution matrix. 747 * 748 * @return the <code>PAM170</code> amino acid substitution matrix 749 */ 750 public static SubstitutionMatrix getPam170() { 751 return getAminoAcidMatrix("PAM170"); 752 } 753 754 /** 755 * Return the <code>PAM180</code> amino acid substitution matrix. 756 * 757 * @return the <code>PAM180</code> amino acid substitution matrix 758 */ 759 public static SubstitutionMatrix getPam180() { 760 return getAminoAcidMatrix("PAM180"); 761 } 762 763 /** 764 * Return the <code>PAM190</code> amino acid substitution matrix. 765 * 766 * @return the <code>PAM190</code> amino acid substitution matrix 767 */ 768 public static SubstitutionMatrix getPam190() { 769 return getAminoAcidMatrix("PAM190"); 770 } 771 772 /** 773 * Return the <code>PAM20</code> amino acid substitution matrix. 774 * 775 * @return the <code>PAM20</code> amino acid substitution matrix 776 */ 777 public static SubstitutionMatrix getPam20() { 778 return getAminoAcidMatrix("PAM20"); 779 } 780 781 /** 782 * Return the <code>PAM200</code> amino acid substitution matrix. 783 * 784 * @return the <code>PAM200</code> amino acid substitution matrix 785 */ 786 public static SubstitutionMatrix getPam200() { 787 return getAminoAcidMatrix("PAM200"); 788 } 789 790 /** 791 * Return the <code>PAM210</code> amino acid substitution matrix. 792 * 793 * @return the <code>PAM210</code> amino acid substitution matrix 794 */ 795 public static SubstitutionMatrix getPam210() { 796 return getAminoAcidMatrix("PAM210"); 797 } 798 799 /** 800 * Return the <code>PAM220</code> amino acid substitution matrix. 801 * 802 * @return the <code>PAM220</code> amino acid substitution matrix 803 */ 804 public static SubstitutionMatrix getPam220() { 805 return getAminoAcidMatrix("PAM220"); 806 } 807 808 /** 809 * Return the <code>PAM230</code> amino acid substitution matrix. 810 * 811 * @return the <code>PAM230</code> amino acid substitution matrix 812 */ 813 public static SubstitutionMatrix getPam230() { 814 return getAminoAcidMatrix("PAM230"); 815 } 816 817 /** 818 * Return the <code>PAM240</code> amino acid substitution matrix. 819 * 820 * @return the <code>PAM240</code> amino acid substitution matrix 821 */ 822 public static SubstitutionMatrix getPam240() { 823 return getAminoAcidMatrix("PAM240"); 824 } 825 826 /** 827 * Return the <code>PAM250</code> amino acid substitution matrix. 828 * 829 * @return the <code>PAM250</code> amino acid substitution matrix 830 */ 831 public static SubstitutionMatrix getPam250() { 832 return getAminoAcidMatrix("PAM250"); 833 } 834 835 /** 836 * Return the <code>PAM260</code> amino acid substitution matrix. 837 * 838 * @return the <code>PAM260</code> amino acid substitution matrix 839 */ 840 public static SubstitutionMatrix getPam260() { 841 return getAminoAcidMatrix("PAM260"); 842 } 843 844 /** 845 * Return the <code>PAM270</code> amino acid substitution matrix. 846 * 847 * @return the <code>PAM270</code> amino acid substitution matrix 848 */ 849 public static SubstitutionMatrix getPam270() { 850 return getAminoAcidMatrix("PAM270"); 851 } 852 853 /** 854 * Return the <code>PAM280</code> amino acid substitution matrix. 855 * 856 * @return the <code>PAM280</code> amino acid substitution matrix 857 */ 858 public static SubstitutionMatrix getPam280() { 859 return getAminoAcidMatrix("PAM280"); 860 } 861 862 /** 863 * Return the <code>PAM290</code> amino acid substitution matrix. 864 * 865 * @return the <code>PAM290</code> amino acid substitution matrix 866 */ 867 public static SubstitutionMatrix getPam290() { 868 return getAminoAcidMatrix("PAM290"); 869 } 870 871 /** 872 * Return the <code>PAM30</code> amino acid substitution matrix. 873 * 874 * @return the <code>PAM30</code> amino acid substitution matrix 875 */ 876 public static SubstitutionMatrix getPam30() { 877 return getAminoAcidMatrix("PAM30"); 878 } 879 880 /** 881 * Return the <code>PAM300</code> amino acid substitution matrix. 882 * 883 * @return the <code>PAM300</code> amino acid substitution matrix 884 */ 885 public static SubstitutionMatrix getPam300() { 886 return getAminoAcidMatrix("PAM300"); 887 } 888 889 /** 890 * Return the <code>PAM310</code> amino acid substitution matrix. 891 * 892 * @return the <code>PAM310</code> amino acid substitution matrix 893 */ 894 public static SubstitutionMatrix getPam310() { 895 return getAminoAcidMatrix("PAM310"); 896 } 897 898 /** 899 * Return the <code>PAM320</code> amino acid substitution matrix. 900 * 901 * @return the <code>PAM320</code> amino acid substitution matrix 902 */ 903 public static SubstitutionMatrix getPam320() { 904 return getAminoAcidMatrix("PAM320"); 905 } 906 907 /** 908 * Return the <code>PAM330</code> amino acid substitution matrix. 909 * 910 * @return the <code>PAM330</code> amino acid substitution matrix 911 */ 912 public static SubstitutionMatrix getPam330() { 913 return getAminoAcidMatrix("PAM330"); 914 } 915 916 /** 917 * Return the <code>PAM340</code> amino acid substitution matrix. 918 * 919 * @return the <code>PAM340</code> amino acid substitution matrix 920 */ 921 public static SubstitutionMatrix getPam340() { 922 return getAminoAcidMatrix("PAM340"); 923 } 924 925 /** 926 * Return the <code>PAM350</code> amino acid substitution matrix. 927 * 928 * @return the <code>PAM350</code> amino acid substitution matrix 929 */ 930 public static SubstitutionMatrix getPam350() { 931 return getAminoAcidMatrix("PAM350"); 932 } 933 934 /** 935 * Return the <code>PAM360</code> amino acid substitution matrix. 936 * 937 * @return the <code>PAM360</code> amino acid substitution matrix 938 */ 939 public static SubstitutionMatrix getPam360() { 940 return getAminoAcidMatrix("PAM360"); 941 } 942 943 /** 944 * Return the <code>PAM370</code> amino acid substitution matrix. 945 * 946 * @return the <code>PAM370</code> amino acid substitution matrix 947 */ 948 public static SubstitutionMatrix getPam370() { 949 return getAminoAcidMatrix("PAM370"); 950 } 951 952 /** 953 * Return the <code>PAM380</code> amino acid substitution matrix. 954 * 955 * @return the <code>PAM380</code> amino acid substitution matrix 956 */ 957 public static SubstitutionMatrix getPam380() { 958 return getAminoAcidMatrix("PAM380"); 959 } 960 961 /** 962 * Return the <code>PAM390</code> amino acid substitution matrix. 963 * 964 * @return the <code>PAM390</code> amino acid substitution matrix 965 */ 966 public static SubstitutionMatrix getPam390() { 967 return getAminoAcidMatrix("PAM390"); 968 } 969 970 /** 971 * Return the <code>PAM40</code> amino acid substitution matrix. 972 * 973 * @return the <code>PAM40</code> amino acid substitution matrix 974 */ 975 public static SubstitutionMatrix getPam40() { 976 return getAminoAcidMatrix("PAM40"); 977 } 978 979 /** 980 * Return the <code>PAM400</code> amino acid substitution matrix. 981 * 982 * @return the <code>PAM400</code> amino acid substitution matrix 983 */ 984 public static SubstitutionMatrix getPam400() { 985 return getAminoAcidMatrix("PAM400"); 986 } 987 988 /** 989 * Return the <code>PAM410</code> amino acid substitution matrix. 990 * 991 * @return the <code>PAM410</code> amino acid substitution matrix 992 */ 993 public static SubstitutionMatrix getPam410() { 994 return getAminoAcidMatrix("PAM410"); 995 } 996 997 /** 998 * Return the <code>PAM420</code> amino acid substitution matrix. 999 * 1000 * @return the <code>PAM420</code> amino acid substitution matrix 1001 */ 1002 public static SubstitutionMatrix getPam420() { 1003 return getAminoAcidMatrix("PAM420"); 1004 } 1005 1006 /** 1007 * Return the <code>PAM430</code> amino acid substitution matrix. 1008 * 1009 * @return the <code>PAM430</code> amino acid substitution matrix 1010 */ 1011 public static SubstitutionMatrix getPam430() { 1012 return getAminoAcidMatrix("PAM430"); 1013 } 1014 1015 /** 1016 * Return the <code>PAM440</code> amino acid substitution matrix. 1017 * 1018 * @return the <code>PAM440</code> amino acid substitution matrix 1019 */ 1020 public static SubstitutionMatrix getPam440() { 1021 return getAminoAcidMatrix("PAM440"); 1022 } 1023 1024 /** 1025 * Return the <code>PAM450</code> amino acid substitution matrix. 1026 * 1027 * @return the <code>PAM450</code> amino acid substitution matrix 1028 */ 1029 public static SubstitutionMatrix getPam450() { 1030 return getAminoAcidMatrix("PAM450"); 1031 } 1032 1033 /** 1034 * Return the <code>PAM460</code> amino acid substitution matrix. 1035 * 1036 * @return the <code>PAM460</code> amino acid substitution matrix 1037 */ 1038 public static SubstitutionMatrix getPam460() { 1039 return getAminoAcidMatrix("PAM460"); 1040 } 1041 1042 /** 1043 * Return the <code>PAM470</code> amino acid substitution matrix. 1044 * 1045 * @return the <code>PAM470</code> amino acid substitution matrix 1046 */ 1047 public static SubstitutionMatrix getPam470() { 1048 return getAminoAcidMatrix("PAM470"); 1049 } 1050 1051 /** 1052 * Return the <code>PAM480</code> amino acid substitution matrix. 1053 * 1054 * @return the <code>PAM480</code> amino acid substitution matrix 1055 */ 1056 public static SubstitutionMatrix getPam480() { 1057 return getAminoAcidMatrix("PAM480"); 1058 } 1059 1060 /** 1061 * Return the <code>PAM490</code> amino acid substitution matrix. 1062 * 1063 * @return the <code>PAM490</code> amino acid substitution matrix 1064 */ 1065 public static SubstitutionMatrix getPam490() { 1066 return getAminoAcidMatrix("PAM490"); 1067 } 1068 1069 /** 1070 * Return the <code>PAM50</code> amino acid substitution matrix. 1071 * 1072 * @return the <code>PAM50</code> amino acid substitution matrix 1073 */ 1074 public static SubstitutionMatrix getPam50() { 1075 return getAminoAcidMatrix("PAM50"); 1076 } 1077 1078 /** 1079 * Return the <code>PAM500</code> amino acid substitution matrix. 1080 * 1081 * @return the <code>PAM500</code> amino acid substitution matrix 1082 */ 1083 public static SubstitutionMatrix getPam500() { 1084 return getAminoAcidMatrix("PAM500"); 1085 } 1086 1087 /** 1088 * Return the <code>PAM60</code> amino acid substitution matrix. 1089 * 1090 * @return the <code>PAM60</code> amino acid substitution matrix 1091 */ 1092 public static SubstitutionMatrix getPam60() { 1093 return getAminoAcidMatrix("PAM60"); 1094 } 1095 1096 /** 1097 * Return the <code>PAM70</code> amino acid substitution matrix. 1098 * 1099 * @return the <code>PAM70</code> amino acid substitution matrix 1100 */ 1101 public static SubstitutionMatrix getPam70() { 1102 return getAminoAcidMatrix("PAM70"); 1103 } 1104 1105 /** 1106 * Return the <code>PAM80</code> amino acid substitution matrix. 1107 * 1108 * @return the <code>PAM80</code> amino acid substitution matrix 1109 */ 1110 public static SubstitutionMatrix getPam80() { 1111 return getAminoAcidMatrix("PAM80"); 1112 } 1113 1114 /** 1115 * Return the <code>PAM90</code> amino acid substitution matrix. 1116 * 1117 * @return the <code>PAM90</code> amino acid substitution matrix 1118 */ 1119 public static SubstitutionMatrix getPam90() { 1120 return getAminoAcidMatrix("PAM90"); 1121 } 1122 1123 /** 1124 * This method tries to identify the alphabet within a matrix file. This is 1125 * necessary in cases where we do not know if this is a matrix for DNA, RNA 1126 * or PROTEIN/PROTEIN-TERM. 1127 * 1128 * @param file 1129 * @return 1130 * @throws IOException 1131 * @throws BioException 1132 * @throws NoSuchElementException 1133 * @throws BioException 1134 */ 1135 private static FiniteAlphabet guessAlphabet(File file) throws IOException, 1136 NoSuchElementException, BioException { 1137 String fileName = file.getName().toLowerCase(); 1138 if (fileName.contains("pam") || fileName.contains("blosum")) 1139 return (FiniteAlphabet) AlphabetManager 1140 .alphabetForName("PROTEIN-TERM"); 1141 return guessAlphabet(new BufferedReader(new FileReader(file))); 1142 } 1143 1144 /** 1145 * This method guesses the alphabet of the given substituttion matrix which 1146 * is required for the parser. 1147 * 1148 * @param reader 1149 * @return 1150 * @throws IOException 1151 * @throws BioException 1152 */ 1153 private static FiniteAlphabet guessAlphabet(BufferedReader reader) 1154 throws IOException, BioException { 1155 String line, trim; 1156 FiniteAlphabet alphabet = null; 1157 while (reader.ready()) { 1158 line = reader.readLine(); 1159 if (line == null) 1160 break; 1161 trim = line.trim(); 1162 if (trim.length()==0 || trim.charAt(0) == '#') 1163 continue; 1164 // Use line in next if-clause because trim will have lost leading important whitespace. 1165 else if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) { 1166 String alphabets[] = new String[] { "DNA", "RNA", "PROTEIN", 1167 "PROTEIN-TERM" }; 1168 SymbolTokenization symtok; 1169 for (int i = 0; i < alphabets.length; i++) { 1170 alphabet = (FiniteAlphabet) AlphabetManager 1171 .alphabetForName(alphabets[i]); 1172 symtok = alphabet.getTokenization("token"); 1173 StringTokenizer st = new StringTokenizer(trim); 1174 boolean noError = true; 1175 for (int j = 0; st.hasMoreElements(); j++) 1176 try { 1177 symtok.parseToken(st.nextElement().toString()); 1178 } catch (IllegalSymbolException exc) { 1179 noError = false; 1180 break; 1181 } 1182 if (noError) 1183 return alphabet; 1184 } 1185 } 1186 } 1187 throw new BioException( 1188 "Unknow alphabet used in this substitution matrix"); 1189 } 1190 1191 /** 1192 * Reads a String representing the contents of a substitution matrix file. 1193 * 1194 * @param matrixObj 1195 * @return matrix 1196 * @throws BioException 1197 * @throws IOException 1198 * @throws NumberFormatException 1199 */ 1200 private short[][] parseMatrix(Object matrixObj) throws BioException, 1201 NumberFormatException, IOException { 1202 int j = 0, rows = 0, cols = 0; 1203 SymbolTokenization symtok = alphabet.getTokenization("token"); 1204 StringTokenizer st; 1205 String line, trim; 1206 1207 this.min = Short.MAX_VALUE; 1208 this.max = Short.MIN_VALUE; 1209 /* 1210 * First: count how many elements are in the matrix fill lines and rows 1211 */ 1212 Reader reader; 1213 if (matrixObj instanceof File) 1214 reader = new FileReader((File) matrixObj); 1215 else if (matrixObj instanceof String) 1216 reader = new StringReader(matrixObj.toString()); 1217 else 1218 return null; 1219 BufferedReader br = new BufferedReader(reader); 1220 1221 while (br.ready()) { 1222 line = br.readLine(); 1223 if (line == null) 1224 break; 1225 trim = line.trim(); 1226 if (trim.length() == 0) 1227 continue; 1228 if (trim.charAt(0) == '#') { 1229 description += line.substring(1); 1230 continue; 1231 } 1232 // Use line in next if-clause because trim will have lost leading important whitespace. 1233 else if (!line.startsWith(newLine)) { 1234 if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) { 1235 st = new StringTokenizer(trim); 1236 for (j = 0; st.hasMoreElements(); j++) { 1237 colSymbols.put(symtok.parseToken(st.nextElement() 1238 .toString()), Integer.valueOf(j)); 1239 } 1240 cols = j; 1241 } else { 1242 // the matrix. 1243 st = new StringTokenizer(trim); 1244 if (st.hasMoreElements()) 1245 rowSymbols.put(symtok.parseToken(st.nextElement() 1246 .toString()), Integer.valueOf(rows++)); 1247 } 1248 } 1249 } 1250 br.close(); 1251 1252 short[][] matrix = new short[rows][cols]; 1253 1254 rows = 0; 1255 if (matrixObj instanceof File) 1256 reader = new FileReader((File) matrixObj); 1257 else if (matrixObj instanceof String) 1258 reader = new StringReader(matrixObj.toString()); 1259 else 1260 return null; 1261 br = new BufferedReader(reader); 1262 1263 /* 1264 * Second reading. Fill the matrix. 1265 */ 1266 while (br.ready()) { 1267 line = br.readLine(); 1268 if (line == null) 1269 break; 1270 trim = line.trim(); 1271 if (trim.length() == 0 || trim.charAt(0) == '#') 1272 continue; 1273 else if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) 1274 continue; 1275 // Use line in next if-clause because trim will have lost leading important whitespace. 1276 else if (!line.startsWith(newLine)) { // lines: 1277 st = new StringTokenizer(trim); 1278 if (st.hasMoreElements()) 1279 st.nextElement(); // throw away Symbol at 1280 // beginning. 1281 for (j = 0; st.hasMoreElements(); j++) {// cols: 1282 matrix[rows][j] = (short) Math.round(Double.parseDouble(st 1283 .nextElement().toString())); 1284 if (matrix[rows][j] > max) 1285 max = matrix[rows][j]; // maximum. 1286 if (matrix[rows][j] < min) 1287 min = matrix[rows][j]; // minimum. 1288 } 1289 rows++; 1290 } 1291 } 1292 br.close(); 1293 1294 return matrix; 1295 } 1296 1297 /** 1298 * There are some substitution matrices containing more columns than lines. 1299 * This has to do with the ambiguous symbols. Lines are always good, columns 1300 * might not contain the whole information. The matrix is supposed to be 1301 * symmetric anyway, so you can always set the ambiguous symbol to be the 1302 * first argument. 1303 * 1304 * @param row 1305 * Symbol of the line 1306 * @param col 1307 * Symbol of the column 1308 * @return expenses for the exchange of symbol row and symbol column. 1309 * @throws BioException 1310 */ 1311 public short getValueAt(Symbol row, Symbol col) throws BioException { 1312 if ((!rowSymbols.containsKey(row)) || (!colSymbols.containsKey(col))) { 1313 System.err.printf("SubstitutionMatrix: No entry for the symbols %s and %s\n", 1314 row.getName(), col.getName()); 1315 1316 // treat the two records as X: 1317 return 0; 1318 } 1319 return matrix[rowSymbols.get(row).intValue()][colSymbols.get(col) 1320 .intValue()]; 1321 } 1322 1323 /** 1324 * This gives you the description of this matrix if there is one. Normally 1325 * substitution matrix files like BLOSUM contain some lines of description. 1326 * 1327 * @return the comment of the matrix 1328 */ 1329 public String getDescription() { 1330 return description; 1331 } 1332 1333 /** 1334 * Every substitution matrix has a name like "BLOSUM30" or "PAM160". This 1335 * will be returned by this method. 1336 * 1337 * @return the name of the matrix. 1338 */ 1339 public String getName() { 1340 return name; 1341 } 1342 1343 /** 1344 * The minimum score of this matrix. 1345 * 1346 * @return minimum of the matrix. 1347 */ 1348 public short getMin() { 1349 return min; 1350 } 1351 1352 /** 1353 * The maximum score in this matrix. 1354 * 1355 * @return maximum of the matrix. 1356 */ 1357 public short getMax() { 1358 return max; 1359 } 1360 1361 /** 1362 * Sets the description to the given value. 1363 * 1364 * @param desc 1365 * a description. This doesn't have to start with '#'. 1366 */ 1367 public void setDescription(String desc) { 1368 this.description = desc; 1369 } 1370 1371 /** 1372 * Gives the alphabet used by this matrix. 1373 * 1374 * @return the alphabet of this matrix. 1375 */ 1376 public FiniteAlphabet getAlphabet() { 1377 return alphabet; 1378 } 1379 1380 /** 1381 * Creates a <code>String</code> representation of this matrix. 1382 * 1383 * @return a string representation of this matrix without the description. 1384 */ 1385 public String stringnifyMatrix() { 1386 int i = 0; 1387 StringBuffer matrixString = new StringBuffer(); 1388 Symbol[] colSyms = new Symbol[this.colSymbols.keySet().size()]; 1389 1390 try { 1391 SymbolTokenization symtok = alphabet.getTokenization("default"); 1392 matrixString.append(" "); 1393 Iterator<Symbol> colKeys = colSymbols.keySet().iterator(); 1394 while (colKeys.hasNext()) { 1395 colSyms[i] = colKeys.next(); 1396 matrixString.append(symtok.tokenizeSymbol(colSyms[i++]) 1397 .toUpperCase()); 1398 matrixString.append(' '); 1399 } 1400 matrixString.append(newLine); 1401 1402 Iterator<Symbol> rowKeys = rowSymbols.keySet().iterator(); 1403 while (rowKeys.hasNext()) { 1404 Symbol rowSym = rowKeys.next(); 1405 matrixString 1406 .append(symtok.tokenizeSymbol(rowSym).toUpperCase()); 1407 matrixString.append(' '); 1408 for (i = 0; i < colSyms.length; i++) { 1409 matrixString.append(getValueAt(rowSym, colSyms[i])); 1410 matrixString.append(' '); 1411 } 1412 matrixString.append(newLine); 1413 } 1414 } catch (BioException exc) { 1415 exc.printStackTrace(); 1416 } 1417 return matrixString.toString(); 1418 } 1419 1420 /** 1421 * Converts the description of the matrix to a String. 1422 * 1423 * @return Gives a description with approximately 60 letters on every line 1424 * separated by <code>System.getProperty("line.separator")</code>. 1425 * Every line starts with <code>#</code>. 1426 */ 1427 public String stringnifyDescription() { 1428 StringBuffer desc = new StringBuffer(), line = new StringBuffer(); 1429 line.append("# "); 1430 StringTokenizer st = new StringTokenizer(description, " "); 1431 while (st.hasMoreElements()) { 1432 line.append(st.nextElement().toString()); 1433 line.append(' '); 1434 if (line.length() >= 60) { 1435 desc.append(line); 1436 desc.append(newLine); 1437 if (st.hasMoreElements()) { 1438 line = new StringBuffer(); 1439 line.append("# "); 1440 } 1441 } else if (!st.hasMoreElements()) { 1442 desc.append(line); 1443 desc.append(newLine); 1444 } 1445 } 1446 return desc.toString(); 1447 } 1448 1449 /** 1450 * Overrides the inherited method. 1451 * 1452 * @return Gives a string representation of the SubstitutionMatrix. This is 1453 * a valid input for the constructor which needs a matrix string. 1454 * This String also contains the description of the matrix if there 1455 * is one. 1456 */ 1457 @Override 1458 public String toString() { 1459 StringBuffer desc = new StringBuffer(), line = new StringBuffer(); 1460 line.append("# "); 1461 StringTokenizer st = new StringTokenizer(description); 1462 while (st.hasMoreElements()) { 1463 line.append(st.nextElement().toString()); 1464 line.append(' '); 1465 if (line.length() >= 60) { 1466 desc.append(line); 1467 desc.append(newLine); 1468 if (st.hasMoreElements()) { 1469 line = new StringBuffer(); 1470 line.append("# "); 1471 } 1472 } else if (!st.hasMoreElements()) { 1473 desc.append(line); 1474 desc.append(newLine); 1475 } 1476 } 1477 desc.append(stringnifyMatrix()); 1478 return desc.toString(); 1479 } 1480 1481 /** 1482 * Just to perform some test. It prints the matrix on the screen. 1483 */ 1484 public void printMatrix() { 1485 // Test output: 1486 Iterator<Symbol> rowKeys = rowSymbols.keySet().iterator(); 1487 while (rowKeys.hasNext()) { 1488 Iterator<Symbol> colKeys = colSymbols.keySet().iterator(); 1489 Symbol rowSym = rowKeys.next(); 1490 System.out.print(rowSym.getName() + "\t"); 1491 while (colKeys.hasNext()) { 1492 Symbol colSym = colKeys.next(); 1493 int x = rowSymbols.get(rowSym).intValue(); 1494 int y = colSymbols.get(colSym).intValue(); 1495 System.out.print(colSym.getName() + " " + " " + x + " " + y 1496 + " " + matrix[x][y] + "\t"); 1497 } 1498 System.out.println(newLine); 1499 } 1500 System.out.println(toString()); 1501 } 1502 1503 /** 1504 * With this method you can get a “normalized” 1505 * <code>SubstitutionMatrix</code> object; however, since this 1506 * implementation uses an short matrix, the normalized matrix will be scaled 1507 * by ten. If you need values between zero and one, you have to divide every 1508 * value returned by <code>getValueAt</code> by ten. 1509 * 1510 * @return a new and normalized <code>SubstitutionMatrix</code> object given 1511 * by this substitution matrix. Because this uses an 1512 * <code>short</code> matrix, all values are scaled by 10. 1513 * @throws BioException 1514 * @throws IOException 1515 * @throws NumberFormatException 1516 */ 1517 public SubstitutionMatrix normalizeMatrix() throws BioException, 1518 NumberFormatException, IOException { 1519 int i, j; 1520 short min = getMin(), newMax = Short.MIN_VALUE; 1521 short[][] mat = new short[matrix.length][matrix[matrix.length - 1].length]; 1522 String name = getName() + "_normalized"; 1523 String matString = stringnifyDescription() + " "; 1524 FiniteAlphabet alphabet = getAlphabet(); 1525 Map<Symbol, Integer> rowMap = this.rowSymbols; 1526 Map<Symbol, Integer> colMap = this.colSymbols; 1527 SymbolTokenization symtok = alphabet.getTokenization("default"); 1528 1529 for (i = 0; i < matrix.length; i++) 1530 for (j = 0; j < matrix[matrix.length - 1].length; j++) { 1531 mat[i][j] = (short) (matrix[i][j] - min); 1532 if (mat[i][j] > newMax) 1533 newMax = mat[i][j]; 1534 } 1535 1536 for (i = 0; i < mat.length; i++) 1537 for (j = 0; j < mat[mat.length - 1].length; j++) 1538 mat[i][j] = (short) (mat[i][j] * 10 / newMax); 1539 1540 Object[] rows = rowSymbols.keySet().toArray(); 1541 Object[] cols = colSymbols.keySet().toArray(); 1542 for (i = 0; i < cols.length; i++) 1543 matString += symtok.tokenizeSymbol((Symbol) cols[i]) + " "; 1544 for (i = 0; i < rows.length; i++) { 1545 matString += newLine + symtok.tokenizeSymbol((Symbol) rows[i]) 1546 + " "; 1547 for (j = 0; j < cols.length; j++) { 1548 matString += mat[rowMap.get((Symbol) rows[i]).intValue()][colMap 1549 .get((Symbol) cols[j]).intValue()] 1550 + " "; 1551 } 1552 } 1553 matString += newLine; 1554 return new SubstitutionMatrix(alphabet, matString, name); 1555 } 1556 1557}