001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021/*
022 * Created on 2005-08-01
023 */
024
025package org.biojava.bio.alignment;
026
027import java.io.BufferedReader;
028import java.io.File;
029import java.io.FileReader;
030import java.io.IOException;
031import java.io.InputStreamReader;
032import java.io.Reader;
033import java.io.Serializable;
034import java.io.StringReader;
035import java.util.HashMap;
036import java.util.Iterator;
037import java.util.Map;
038import java.util.NoSuchElementException;
039import java.util.StringTokenizer;
040
041import org.biojava.bio.BioException;
042import org.biojava.bio.seq.DNATools;
043import org.biojava.bio.seq.ProteinTools;
044import org.biojava.bio.seq.io.SymbolTokenization;
045import org.biojava.bio.symbol.AlphabetManager;
046import org.biojava.bio.symbol.FiniteAlphabet;
047import org.biojava.bio.symbol.IllegalSymbolException;
048import org.biojava.bio.symbol.Symbol;
049
050/**
051 * <p>
052 * This object is able to read a substitution matrix file and constructs a short
053 * matrix in memory. Every single element of the matrix can be accessed by the
054 * method <code>getValueAt</code> with the parameters being two BioJava symbols.
055 * This is why it is not necessary to access the matrix directly. If there is no
056 * value for the two specified <code>Symbol</code>s an <code>Exception</code> is
057 * thrown.
058 * </p>
059 * <p>
060 * Substitution matrix files, are available at <a
061 * href="ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/"> the NCBI FTP
062 * directory</a>.
063 * </p>
064 * 
065 * @author Andreas Dr&auml;ger <andreas.draeger@uni-tuebingen.de>
066 */
067public class SubstitutionMatrix implements Serializable {
068        
069        /**
070         * 
071         */
072        private Map<Symbol, Integer> rowSymbols, colSymbols;
073
074        /**
075         * 
076         */
077        private short[][] matrix;
078
079        /**
080         * Minimal and maximal entry in this matrix
081         */
082        private short min, max;
083
084        /**
085         * The alphabet used by this matrix.
086         */
087        private FiniteAlphabet alphabet;
088
089        /**
090         * Name and description of this matrix.
091         */
092        private String description, name;
093
094        /**
095         * Just the new line symbol of the system.
096         */
097        private static final String newLine = System.getProperty("line.separator");
098
099        /**
100         * This constructs a <code>SubstitutionMatrix</code> object that contains
101         * two <code>Map</code> data structures having BioJava symbols as keys and
102         * the value being the index of the matrix containing the substitution
103         * score.
104         * 
105         * @param alpha
106         *            the alphabet of the matrix (e.g., DNA, RNA or PROTEIN, or
107         *            PROTEIN-TERM)
108         * @param matrixFile
109         *            the file containing the substitution matrix. Lines starting
110         *            with '<code>#</code>' are comments. The line starting with a
111         *            white space, is the table head. Every line has to start with
112         *            the one letter representation of the Symbol and then the
113         *            values for the exchange.
114         * @throws IOException
115         * @throws BioException
116         * @throws NumberFormatException
117         */
118        public SubstitutionMatrix(FiniteAlphabet alpha, File matrixFile)
119                        throws BioException, NumberFormatException, IOException {
120                this.alphabet = alpha;
121                this.description = "";
122                this.name = matrixFile.getName();
123                this.rowSymbols = new HashMap<Symbol, Integer>();
124                this.colSymbols = new HashMap<Symbol, Integer>();
125                this.matrix = this.parseMatrix(matrixFile);
126        }
127
128        /**
129         * With this constructor it is possible to construct a SubstitutionMatrix
130         * object from a substitution matrix file. The given String contains a
131         * number of lines separated by
132         * <code>System.getProperty("line.separator")</code>. Everything else is the
133         * same than for the constructor above.
134         * 
135         * @param alpha
136         *            The <code>FiniteAlphabet</code> to use
137         * @param matrixString
138         * @param name
139         *            of the matrix.
140         * @throws BioException
141         * @throws IOException
142         * @throws NumberFormatException
143         */
144        public SubstitutionMatrix(FiniteAlphabet alpha, String matrixString,
145                        String name) throws BioException, NumberFormatException,
146                        IOException {
147                this.alphabet = alpha;
148                this.description = "";
149                this.name = name;
150                this.rowSymbols = new HashMap<Symbol, Integer>();
151                this.colSymbols = new HashMap<Symbol, Integer>();
152                this.matrix = this.parseMatrix(matrixString);
153                // this.printMatrix();
154        }
155
156        /**
157         * Constructs a SubstitutionMatrix with every Match and every Replace having
158         * the same expenses given by the parameters. Ambiguous symbols are not
159         * considered because there might be to many of them (for proteins).
160         * 
161         * @param alpha
162         * @param match
163         * @param replace
164         */
165        public SubstitutionMatrix(FiniteAlphabet alpha, short match, short replace) {
166                int i = 0, j = 0;
167
168                this.alphabet = alpha;
169                this.description = "Identity matrix. All replaces and all matches are treated equally.";
170                this.name = "IDENTITY_" + match + "_" + replace;
171                this.rowSymbols = new HashMap<Symbol, Integer>();
172                this.colSymbols = new HashMap<Symbol, Integer>();
173                this.matrix = new short[alpha.size()][alpha.size()];
174
175                Symbol[] sym = new Symbol[alpha.size()];
176                Iterator<Symbol> iter = alpha.iterator();
177
178                for (i = 0; iter.hasNext(); i++) {
179                        sym[i] = iter.next();
180                        rowSymbols.put(sym[i], new Integer(i));
181                        colSymbols.put(sym[i], new Integer(i));
182                }
183
184                for (i = 0; i < alphabet.size(); i++)
185                        for (j = 0; j < alphabet.size(); j++)
186                                if (sym[i].getMatches().contains(sym[j]))
187                                        matrix[i][j] = match;
188                                else
189                                        matrix[i][j] = replace;
190
191                // this.printMatrix();
192        }
193
194        /**
195         * This constructor can be used to guess the alphabet of this substitution
196         * matrix. However, it is recommended to apply another constructor if the
197         * alphabet is known.
198         * 
199         * @param file
200         *            A file containing a substitution matrix.
201         * @throws NumberFormatException
202         * @throws NoSuchElementException
203         * @throws BioException
204         * @throws IOException
205         */
206        public SubstitutionMatrix(File file) throws NumberFormatException,
207                        NoSuchElementException, BioException, IOException {
208                this(guessAlphabet(file), file);
209        }
210
211        /**
212         * This constructor can be used to guess the alphabet of this substitution
213         * matrix. However, it is recommended to apply another constructor if the
214         * alphabet is known.
215         * 
216         * @param reader
217         * @throws NumberFormatException
218         * @throws BioException
219         * @throws IOException
220         */
221        public static SubstitutionMatrix getSubstitutionMatrix(BufferedReader reader)
222                        throws NumberFormatException, BioException, IOException {
223                StringBuffer stringMatrix = new StringBuffer("");
224                while (reader.ready()) {
225                        stringMatrix.append(reader.readLine());
226                        stringMatrix.append(newLine);
227                }
228                reader.close();
229                String mat = stringMatrix.toString();
230                FiniteAlphabet alpha = guessAlphabet(new BufferedReader(
231                                new StringReader(mat)));
232                SubstitutionMatrix matrix = new SubstitutionMatrix(alpha, mat,
233                                "unknown");
234                return matrix;
235        }
236
237    /**
238     * Return a new substitution matrix with the specified alphabet.
239     *
240     * @param alphabet alphabet, must not be null
241     * @param reader reader, must not be null
242     * @return a new substitution matrix with the specified alphabet
243     * @throws BioException if an error occurs
244     * @throws IOException if an I/O error occurs
245     */
246    public static SubstitutionMatrix getSubstitutionMatrix(final FiniteAlphabet alphabet, final BufferedReader reader) throws BioException, IOException {
247        if (alphabet == null) {
248            throw new NullPointerException("alphabet must not be null");
249        }
250        if (reader == null) {
251            throw new NullPointerException("reader must not be null");
252        }
253        return new SubstitutionMatrix(alphabet, toString(reader), "unknown");
254    }
255
256    private static String toString(final BufferedReader reader) throws IOException {
257        StringBuilder sb = new StringBuilder();
258        try {
259            while (reader.ready()) {
260                String line = reader.readLine();
261                if (line == null) {
262                    break;
263                }
264                sb.append(line);
265                sb.append(newLine);
266            }
267            return sb.toString();
268        }
269        finally {
270            try {
271                reader.close();
272            }
273            catch (Exception e) {
274                // ignore
275            }
276        }
277    }
278
279    /**
280     * Return a new substitution matrix with the specified alphabet and name.
281     *
282     * @param alphabet alphabet, must not be null
283     * @param reader reader, must not be null
284     * @param name name, must not be null
285     * @return a new substitution matrix with the specified alphabet and name
286     * @throws BioException if an error occurs
287     * @throws IOException if an I/O error occurs
288     */
289    public static SubstitutionMatrix getSubstitutionMatrix(final FiniteAlphabet alphabet, final BufferedReader reader, final String name) throws BioException, IOException {
290        if (alphabet == null) {
291            throw new NullPointerException("alphabet must not be null");
292        }
293        if (reader == null) {
294            throw new NullPointerException("reader must not be null");
295        }
296        if (name == null) {
297            throw new NullPointerException("name must not be null");
298        }
299        return new SubstitutionMatrix(alphabet, toString(reader), name);
300    }
301
302    private static BufferedReader readResource(final String name) {
303        return new BufferedReader(new InputStreamReader(SubstitutionMatrix.class.getResourceAsStream(name)));
304    }
305
306    private static SubstitutionMatrix getNucleotideMatrix(final String name) {
307        try {
308            return getSubstitutionMatrix(DNATools.getDNA(), readResource(name), name);
309        }
310        catch (BioException e) {
311            throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e);
312        }
313        catch (IOException e) {
314            throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e);
315        }
316    }
317
318    private static SubstitutionMatrix getAminoAcidMatrix(final String name) {
319        try {
320            return getSubstitutionMatrix(ProteinTools.getTAlphabet(), readResource(name), name);
321        }
322        catch (BioException e) {
323            throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e);
324        }
325        catch (IOException e) {
326            throw new RuntimeException("could not load substitution matrix " + name + " from classpath", e);
327        }
328    }
329
330
331    /**
332     * Return the <code>BLOSUM100</code> amino acid substitution matrix.
333     *
334     * @return the <code>BLOSUM100</code> amino acid substitution matrix
335     */
336    public static SubstitutionMatrix getBlosum100() {
337        return getAminoAcidMatrix("BLOSUM100");
338    }
339
340    /**
341     * Return the <code>BLOSUM100.50</code> amino acid substitution matrix.
342     *
343     * @return the <code>BLOSUM100.50</code> amino acid substitution matrix
344     */
345    public static SubstitutionMatrix getBlosum100_50() {
346        return getAminoAcidMatrix("BLOSUM100.50");
347    }
348
349    /**
350     * Return the <code>BLOSUM30</code> amino acid substitution matrix.
351     *
352     * @return the <code>BLOSUM30</code> amino acid substitution matrix
353     */
354    public static SubstitutionMatrix getBlosum30() {
355        return getAminoAcidMatrix("BLOSUM30");
356    }
357
358    /**
359     * Return the <code>BLOSUM30.50</code> amino acid substitution matrix.
360     *
361     * @return the <code>BLOSUM30.50</code> amino acid substitution matrix
362     */
363    public static SubstitutionMatrix getBlosum30_50() {
364        return getAminoAcidMatrix("BLOSUM30.50");
365    }
366
367    /**
368     * Return the <code>BLOSUM35</code> amino acid substitution matrix.
369     *
370     * @return the <code>BLOSUM35</code> amino acid substitution matrix
371     */
372    public static SubstitutionMatrix getBlosum35() {
373        return getAminoAcidMatrix("BLOSUM35");
374    }
375
376    /**
377     * Return the <code>BLOSUM35.50</code> amino acid substitution matrix.
378     *
379     * @return the <code>BLOSUM35.50</code> amino acid substitution matrix
380     */
381    public static SubstitutionMatrix getBlosum35_50() {
382        return getAminoAcidMatrix("BLOSUM35.50");
383    }
384
385    /**
386     * Return the <code>BLOSUM40</code> amino acid substitution matrix.
387     *
388     * @return the <code>BLOSUM40</code> amino acid substitution matrix
389     */
390    public static SubstitutionMatrix getBlosum40() {
391        return getAminoAcidMatrix("BLOSUM40");
392    }
393
394    /**
395     * Return the <code>BLOSUM40.50</code> amino acid substitution matrix.
396     *
397     * @return the <code>BLOSUM40.50</code> amino acid substitution matrix
398     */
399    public static SubstitutionMatrix getBlosum40_50() {
400        return getAminoAcidMatrix("BLOSUM40.50");
401    }
402
403    /**
404     * Return the <code>BLOSUM45</code> amino acid substitution matrix.
405     *
406     * @return the <code>BLOSUM45</code> amino acid substitution matrix
407     */
408    public static SubstitutionMatrix getBlosum45() {
409        return getAminoAcidMatrix("BLOSUM45");
410    }
411
412    /**
413     * Return the <code>BLOSUM45.50</code> amino acid substitution matrix.
414     *
415     * @return the <code>BLOSUM45.50</code> amino acid substitution matrix
416     */
417    public static SubstitutionMatrix getBlosum45_50() {
418        return getAminoAcidMatrix("BLOSUM45.50");
419    }
420
421    /**
422     * Return the <code>BLOSUM50</code> amino acid substitution matrix.
423     *
424     * @return the <code>BLOSUM50</code> amino acid substitution matrix
425     */
426    public static SubstitutionMatrix getBlosum50() {
427        return getAminoAcidMatrix("BLOSUM50");
428    }
429
430    /**
431     * Return the <code>BLOSUM50.50</code> amino acid substitution matrix.
432     *
433     * @return the <code>BLOSUM50.50</code> amino acid substitution matrix
434     */
435    public static SubstitutionMatrix getBlosum50_50() {
436        return getAminoAcidMatrix("BLOSUM50.50");
437    }
438
439    /**
440     * Return the <code>BLOSUM55</code> amino acid substitution matrix.
441     *
442     * @return the <code>BLOSUM55</code> amino acid substitution matrix
443     */
444    public static SubstitutionMatrix getBlosum55() {
445        return getAminoAcidMatrix("BLOSUM55");
446    }
447
448    /**
449     * Return the <code>BLOSUM55.50</code> amino acid substitution matrix.
450     *
451     * @return the <code>BLOSUM55.50</code> amino acid substitution matrix
452     */
453    public static SubstitutionMatrix getBlosum55_50() {
454        return getAminoAcidMatrix("BLOSUM55.50");
455    }
456
457    /**
458     * Return the <code>BLOSUM60</code> amino acid substitution matrix.
459     *
460     * @return the <code>BLOSUM60</code> amino acid substitution matrix
461     */
462    public static SubstitutionMatrix getBlosum60() {
463        return getAminoAcidMatrix("BLOSUM60");
464    }
465
466    /**
467     * Return the <code>BLOSUM60.50</code> amino acid substitution matrix.
468     *
469     * @return the <code>BLOSUM60.50</code> amino acid substitution matrix
470     */
471    public static SubstitutionMatrix getBlosum60_50() {
472        return getAminoAcidMatrix("BLOSUM60.50");
473    }
474
475    /**
476     * Return the <code>BLOSUM62</code> amino acid substitution matrix.
477     *
478     * @return the <code>BLOSUM62</code> amino acid substitution matrix
479     */
480    public static SubstitutionMatrix getBlosum62() {
481        return getAminoAcidMatrix("BLOSUM62");
482    }
483
484    /**
485     * Return the <code>BLOSUM62.50</code> amino acid substitution matrix.
486     *
487     * @return the <code>BLOSUM62.50</code> amino acid substitution matrix
488     */
489    public static SubstitutionMatrix getBlosum62_50() {
490        return getAminoAcidMatrix("BLOSUM62.50");
491    }
492
493    /**
494     * Return the <code>BLOSUM65</code> amino acid substitution matrix.
495     *
496     * @return the <code>BLOSUM65</code> amino acid substitution matrix
497     */
498    public static SubstitutionMatrix getBlosum65() {
499        return getAminoAcidMatrix("BLOSUM65");
500    }
501
502    /**
503     * Return the <code>BLOSUM65.50</code> amino acid substitution matrix.
504     *
505     * @return the <code>BLOSUM65.50</code> amino acid substitution matrix
506     */
507    public static SubstitutionMatrix getBlosum65_50() {
508        return getAminoAcidMatrix("BLOSUM65.50");
509    }
510
511    /**
512     * Return the <code>BLOSUM70</code> amino acid substitution matrix.
513     *
514     * @return the <code>BLOSUM70</code> amino acid substitution matrix
515     */
516    public static SubstitutionMatrix getBlosum70() {
517        return getAminoAcidMatrix("BLOSUM70");
518    }
519
520    /**
521     * Return the <code>BLOSUM70.50</code> amino acid substitution matrix.
522     *
523     * @return the <code>BLOSUM70.50</code> amino acid substitution matrix
524     */
525    public static SubstitutionMatrix getBlosum70_50() {
526        return getAminoAcidMatrix("BLOSUM70.50");
527    }
528
529    /**
530     * Return the <code>BLOSUM75</code> amino acid substitution matrix.
531     *
532     * @return the <code>BLOSUM75</code> amino acid substitution matrix
533     */
534    public static SubstitutionMatrix getBlosum75() {
535        return getAminoAcidMatrix("BLOSUM75");
536    }
537
538    /**
539     * Return the <code>BLOSUM75.50</code> amino acid substitution matrix.
540     *
541     * @return the <code>BLOSUM75.50</code> amino acid substitution matrix
542     */
543    public static SubstitutionMatrix getBlosum75_50() {
544        return getAminoAcidMatrix("BLOSUM75.50");
545    }
546
547    /**
548     * Return the <code>BLOSUM80</code> amino acid substitution matrix.
549     *
550     * @return the <code>BLOSUM80</code> amino acid substitution matrix
551     */
552    public static SubstitutionMatrix getBlosum80() {
553        return getAminoAcidMatrix("BLOSUM80");
554    }
555
556    /**
557     * Return the <code>BLOSUM80.50</code> amino acid substitution matrix.
558     *
559     * @return the <code>BLOSUM80.50</code> amino acid substitution matrix
560     */
561    public static SubstitutionMatrix getBlosum80_50() {
562        return getAminoAcidMatrix("BLOSUM80.50");
563    }
564
565    /**
566     * Return the <code>BLOSUM85</code> amino acid substitution matrix.
567     *
568     * @return the <code>BLOSUM85</code> amino acid substitution matrix
569     */
570    public static SubstitutionMatrix getBlosum85() {
571        return getAminoAcidMatrix("BLOSUM85");
572    }
573
574    /**
575     * Return the <code>BLOSUM85.50</code> amino acid substitution matrix.
576     *
577     * @return the <code>BLOSUM85.50</code> amino acid substitution matrix
578     */
579    public static SubstitutionMatrix getBlosum85_50() {
580        return getAminoAcidMatrix("BLOSUM85.50");
581    }
582
583    /**
584     * Return the <code>BLOSUM90</code> amino acid substitution matrix.
585     *
586     * @return the <code>BLOSUM90</code> amino acid substitution matrix
587     */
588    public static SubstitutionMatrix getBlosum90() {
589        return getAminoAcidMatrix("BLOSUM90");
590    }
591
592    /**
593     * Return the <code>BLOSUM90.50</code> amino acid substitution matrix.
594     *
595     * @return the <code>BLOSUM90.50</code> amino acid substitution matrix
596     */
597    public static SubstitutionMatrix getBlosum90_50() {
598        return getAminoAcidMatrix("BLOSUM90.50");
599    }
600
601    /**
602     * Return the <code>BLOSUMN</code> amino acid substitution matrix.
603     *
604     * @return the <code>BLOSUMN</code> amino acid substitution matrix
605     */
606    public static SubstitutionMatrix getBlosumn() {
607        return getAminoAcidMatrix("BLOSUMN");
608    }
609
610    /**
611     * Return the <code>BLOSUMN.50</code> amino acid substitution matrix.
612     *
613     * @return the <code>BLOSUMN.50</code> amino acid substitution matrix
614     */
615    public static SubstitutionMatrix getBlosumn_50() {
616        return getAminoAcidMatrix("BLOSUMN.50");
617    }
618
619    /**
620     * Return the <code>DAYHOFF</code> amino acid substitution matrix.
621     *
622     * @return the <code>DAYHOFF</code> amino acid substitution matrix
623     */
624    public static SubstitutionMatrix getDayhoff() {
625        return getAminoAcidMatrix("DAYHOFF");
626    }
627
628    /**
629     * Return the <code>GONNET</code> amino acid substitution matrix.
630     *
631     * @return the <code>GONNET</code> amino acid substitution matrix
632     */
633    public static SubstitutionMatrix getGonnet() {
634        return getAminoAcidMatrix("GONNET");
635    }
636
637    /**
638     * Return the <code>IDENTITY</code> amino acid substitution matrix.
639     *
640     * @return the <code>IDENTITY</code> amino acid substitution matrix
641     */
642    public static SubstitutionMatrix getIdentity() {
643        return getAminoAcidMatrix("IDENTITY");
644    }
645
646    /**
647     * Return the <code>MATCH</code> amino acid substitution matrix.
648     *
649     * @return the <code>MATCH</code> amino acid substitution matrix
650     */
651    public static SubstitutionMatrix getMatch() {
652        return getAminoAcidMatrix("MATCH");
653    }
654
655    /**
656     * Return the <code>NUC.4.2</code> nucleotide substitution matrix.
657     *
658     * @return the <code>NUC.4.2</code> nucleotide substitution matrix
659     */
660    public static SubstitutionMatrix getNuc4_2() {
661        return getNucleotideMatrix("NUC.4.2");
662    }
663
664    /**
665     * Return the <code>NUC.4.4</code> nucleotide substitution matrix.
666     *
667     * @return the <code>NUC.4.4</code> nucleotide substitution matrix
668     */
669    public static SubstitutionMatrix getNuc4_4() {
670        return getNucleotideMatrix("NUC.4.4");
671    }
672
673    /**
674     * Return the <code>PAM10</code> amino acid substitution matrix.
675     *
676     * @return the <code>PAM10</code> amino acid substitution matrix
677     */
678    public static SubstitutionMatrix getPam10() {
679        return getAminoAcidMatrix("PAM10");
680    }
681
682    /**
683     * Return the <code>PAM100</code> amino acid substitution matrix.
684     *
685     * @return the <code>PAM100</code> amino acid substitution matrix
686     */
687    public static SubstitutionMatrix getPam100() {
688        return getAminoAcidMatrix("PAM100");
689    }
690
691    /**
692     * Return the <code>PAM110</code> amino acid substitution matrix.
693     *
694     * @return the <code>PAM110</code> amino acid substitution matrix
695     */
696    public static SubstitutionMatrix getPam110() {
697        return getAminoAcidMatrix("PAM110");
698    }
699
700    /**
701     * Return the <code>PAM120</code> amino acid substitution matrix.
702     *
703     * @return the <code>PAM120</code> amino acid substitution matrix
704     */
705    public static SubstitutionMatrix getPam120() {
706        return getAminoAcidMatrix("PAM120");
707    }
708
709    /**
710     * Return the <code>PAM130</code> amino acid substitution matrix.
711     *
712     * @return the <code>PAM130</code> amino acid substitution matrix
713     */
714    public static SubstitutionMatrix getPam130() {
715        return getAminoAcidMatrix("PAM130");
716    }
717
718    /**
719     * Return the <code>PAM140</code> amino acid substitution matrix.
720     *
721     * @return the <code>PAM140</code> amino acid substitution matrix
722     */
723    public static SubstitutionMatrix getPam140() {
724        return getAminoAcidMatrix("PAM140");
725    }
726
727    /**
728     * Return the <code>PAM150</code> amino acid substitution matrix.
729     *
730     * @return the <code>PAM150</code> amino acid substitution matrix
731     */
732    public static SubstitutionMatrix getPam150() {
733        return getAminoAcidMatrix("PAM150");
734    }
735
736    /**
737     * Return the <code>PAM160</code> amino acid substitution matrix.
738     *
739     * @return the <code>PAM160</code> amino acid substitution matrix
740     */
741    public static SubstitutionMatrix getPam160() {
742        return getAminoAcidMatrix("PAM160");
743    }
744
745    /**
746     * Return the <code>PAM170</code> amino acid substitution matrix.
747     *
748     * @return the <code>PAM170</code> amino acid substitution matrix
749     */
750    public static SubstitutionMatrix getPam170() {
751        return getAminoAcidMatrix("PAM170");
752    }
753
754    /**
755     * Return the <code>PAM180</code> amino acid substitution matrix.
756     *
757     * @return the <code>PAM180</code> amino acid substitution matrix
758     */
759    public static SubstitutionMatrix getPam180() {
760        return getAminoAcidMatrix("PAM180");
761    }
762
763    /**
764     * Return the <code>PAM190</code> amino acid substitution matrix.
765     *
766     * @return the <code>PAM190</code> amino acid substitution matrix
767     */
768    public static SubstitutionMatrix getPam190() {
769        return getAminoAcidMatrix("PAM190");
770    }
771
772    /**
773     * Return the <code>PAM20</code> amino acid substitution matrix.
774     *
775     * @return the <code>PAM20</code> amino acid substitution matrix
776     */
777    public static SubstitutionMatrix getPam20() {
778        return getAminoAcidMatrix("PAM20");
779    }
780
781    /**
782     * Return the <code>PAM200</code> amino acid substitution matrix.
783     *
784     * @return the <code>PAM200</code> amino acid substitution matrix
785     */
786    public static SubstitutionMatrix getPam200() {
787        return getAminoAcidMatrix("PAM200");
788    }
789
790    /**
791     * Return the <code>PAM210</code> amino acid substitution matrix.
792     *
793     * @return the <code>PAM210</code> amino acid substitution matrix
794     */
795    public static SubstitutionMatrix getPam210() {
796        return getAminoAcidMatrix("PAM210");
797    }
798
799    /**
800     * Return the <code>PAM220</code> amino acid substitution matrix.
801     *
802     * @return the <code>PAM220</code> amino acid substitution matrix
803     */
804    public static SubstitutionMatrix getPam220() {
805        return getAminoAcidMatrix("PAM220");
806    }
807
808    /**
809     * Return the <code>PAM230</code> amino acid substitution matrix.
810     *
811     * @return the <code>PAM230</code> amino acid substitution matrix
812     */
813    public static SubstitutionMatrix getPam230() {
814        return getAminoAcidMatrix("PAM230");
815    }
816
817    /**
818     * Return the <code>PAM240</code> amino acid substitution matrix.
819     *
820     * @return the <code>PAM240</code> amino acid substitution matrix
821     */
822    public static SubstitutionMatrix getPam240() {
823        return getAminoAcidMatrix("PAM240");
824    }
825
826    /**
827     * Return the <code>PAM250</code> amino acid substitution matrix.
828     *
829     * @return the <code>PAM250</code> amino acid substitution matrix
830     */
831    public static SubstitutionMatrix getPam250() {
832        return getAminoAcidMatrix("PAM250");
833    }
834
835    /**
836     * Return the <code>PAM260</code> amino acid substitution matrix.
837     *
838     * @return the <code>PAM260</code> amino acid substitution matrix
839     */
840    public static SubstitutionMatrix getPam260() {
841        return getAminoAcidMatrix("PAM260");
842    }
843
844    /**
845     * Return the <code>PAM270</code> amino acid substitution matrix.
846     *
847     * @return the <code>PAM270</code> amino acid substitution matrix
848     */
849    public static SubstitutionMatrix getPam270() {
850        return getAminoAcidMatrix("PAM270");
851    }
852
853    /**
854     * Return the <code>PAM280</code> amino acid substitution matrix.
855     *
856     * @return the <code>PAM280</code> amino acid substitution matrix
857     */
858    public static SubstitutionMatrix getPam280() {
859        return getAminoAcidMatrix("PAM280");
860    }
861
862    /**
863     * Return the <code>PAM290</code> amino acid substitution matrix.
864     *
865     * @return the <code>PAM290</code> amino acid substitution matrix
866     */
867    public static SubstitutionMatrix getPam290() {
868        return getAminoAcidMatrix("PAM290");
869    }
870
871    /**
872     * Return the <code>PAM30</code> amino acid substitution matrix.
873     *
874     * @return the <code>PAM30</code> amino acid substitution matrix
875     */
876    public static SubstitutionMatrix getPam30() {
877        return getAminoAcidMatrix("PAM30");
878    }
879
880    /**
881     * Return the <code>PAM300</code> amino acid substitution matrix.
882     *
883     * @return the <code>PAM300</code> amino acid substitution matrix
884     */
885    public static SubstitutionMatrix getPam300() {
886        return getAminoAcidMatrix("PAM300");
887    }
888
889    /**
890     * Return the <code>PAM310</code> amino acid substitution matrix.
891     *
892     * @return the <code>PAM310</code> amino acid substitution matrix
893     */
894    public static SubstitutionMatrix getPam310() {
895        return getAminoAcidMatrix("PAM310");
896    }
897
898    /**
899     * Return the <code>PAM320</code> amino acid substitution matrix.
900     *
901     * @return the <code>PAM320</code> amino acid substitution matrix
902     */
903    public static SubstitutionMatrix getPam320() {
904        return getAminoAcidMatrix("PAM320");
905    }
906
907    /**
908     * Return the <code>PAM330</code> amino acid substitution matrix.
909     *
910     * @return the <code>PAM330</code> amino acid substitution matrix
911     */
912    public static SubstitutionMatrix getPam330() {
913        return getAminoAcidMatrix("PAM330");
914    }
915
916    /**
917     * Return the <code>PAM340</code> amino acid substitution matrix.
918     *
919     * @return the <code>PAM340</code> amino acid substitution matrix
920     */
921    public static SubstitutionMatrix getPam340() {
922        return getAminoAcidMatrix("PAM340");
923    }
924
925    /**
926     * Return the <code>PAM350</code> amino acid substitution matrix.
927     *
928     * @return the <code>PAM350</code> amino acid substitution matrix
929     */
930    public static SubstitutionMatrix getPam350() {
931        return getAminoAcidMatrix("PAM350");
932    }
933
934    /**
935     * Return the <code>PAM360</code> amino acid substitution matrix.
936     *
937     * @return the <code>PAM360</code> amino acid substitution matrix
938     */
939    public static SubstitutionMatrix getPam360() {
940        return getAminoAcidMatrix("PAM360");
941    }
942
943    /**
944     * Return the <code>PAM370</code> amino acid substitution matrix.
945     *
946     * @return the <code>PAM370</code> amino acid substitution matrix
947     */
948    public static SubstitutionMatrix getPam370() {
949        return getAminoAcidMatrix("PAM370");
950    }
951
952    /**
953     * Return the <code>PAM380</code> amino acid substitution matrix.
954     *
955     * @return the <code>PAM380</code> amino acid substitution matrix
956     */
957    public static SubstitutionMatrix getPam380() {
958        return getAminoAcidMatrix("PAM380");
959    }
960
961    /**
962     * Return the <code>PAM390</code> amino acid substitution matrix.
963     *
964     * @return the <code>PAM390</code> amino acid substitution matrix
965     */
966    public static SubstitutionMatrix getPam390() {
967        return getAminoAcidMatrix("PAM390");
968    }
969
970    /**
971     * Return the <code>PAM40</code> amino acid substitution matrix.
972     *
973     * @return the <code>PAM40</code> amino acid substitution matrix
974     */
975    public static SubstitutionMatrix getPam40() {
976        return getAminoAcidMatrix("PAM40");
977    }
978
979    /**
980     * Return the <code>PAM400</code> amino acid substitution matrix.
981     *
982     * @return the <code>PAM400</code> amino acid substitution matrix
983     */
984    public static SubstitutionMatrix getPam400() {
985        return getAminoAcidMatrix("PAM400");
986    }
987
988    /**
989     * Return the <code>PAM410</code> amino acid substitution matrix.
990     *
991     * @return the <code>PAM410</code> amino acid substitution matrix
992     */
993    public static SubstitutionMatrix getPam410() {
994        return getAminoAcidMatrix("PAM410");
995    }
996
997    /**
998     * Return the <code>PAM420</code> amino acid substitution matrix.
999     *
1000     * @return the <code>PAM420</code> amino acid substitution matrix
1001     */
1002    public static SubstitutionMatrix getPam420() {
1003        return getAminoAcidMatrix("PAM420");
1004    }
1005
1006    /**
1007     * Return the <code>PAM430</code> amino acid substitution matrix.
1008     *
1009     * @return the <code>PAM430</code> amino acid substitution matrix
1010     */
1011    public static SubstitutionMatrix getPam430() {
1012        return getAminoAcidMatrix("PAM430");
1013    }
1014
1015    /**
1016     * Return the <code>PAM440</code> amino acid substitution matrix.
1017     *
1018     * @return the <code>PAM440</code> amino acid substitution matrix
1019     */
1020    public static SubstitutionMatrix getPam440() {
1021        return getAminoAcidMatrix("PAM440");
1022    }
1023
1024    /**
1025     * Return the <code>PAM450</code> amino acid substitution matrix.
1026     *
1027     * @return the <code>PAM450</code> amino acid substitution matrix
1028     */
1029    public static SubstitutionMatrix getPam450() {
1030        return getAminoAcidMatrix("PAM450");
1031    }
1032
1033    /**
1034     * Return the <code>PAM460</code> amino acid substitution matrix.
1035     *
1036     * @return the <code>PAM460</code> amino acid substitution matrix
1037     */
1038    public static SubstitutionMatrix getPam460() {
1039        return getAminoAcidMatrix("PAM460");
1040    }
1041
1042    /**
1043     * Return the <code>PAM470</code> amino acid substitution matrix.
1044     *
1045     * @return the <code>PAM470</code> amino acid substitution matrix
1046     */
1047    public static SubstitutionMatrix getPam470() {
1048        return getAminoAcidMatrix("PAM470");
1049    }
1050
1051    /**
1052     * Return the <code>PAM480</code> amino acid substitution matrix.
1053     *
1054     * @return the <code>PAM480</code> amino acid substitution matrix
1055     */
1056    public static SubstitutionMatrix getPam480() {
1057        return getAminoAcidMatrix("PAM480");
1058    }
1059
1060    /**
1061     * Return the <code>PAM490</code> amino acid substitution matrix.
1062     *
1063     * @return the <code>PAM490</code> amino acid substitution matrix
1064     */
1065    public static SubstitutionMatrix getPam490() {
1066        return getAminoAcidMatrix("PAM490");
1067    }
1068
1069    /**
1070     * Return the <code>PAM50</code> amino acid substitution matrix.
1071     *
1072     * @return the <code>PAM50</code> amino acid substitution matrix
1073     */
1074    public static SubstitutionMatrix getPam50() {
1075        return getAminoAcidMatrix("PAM50");
1076    }
1077
1078    /**
1079     * Return the <code>PAM500</code> amino acid substitution matrix.
1080     *
1081     * @return the <code>PAM500</code> amino acid substitution matrix
1082     */
1083    public static SubstitutionMatrix getPam500() {
1084        return getAminoAcidMatrix("PAM500");
1085    }
1086
1087    /**
1088     * Return the <code>PAM60</code> amino acid substitution matrix.
1089     *
1090     * @return the <code>PAM60</code> amino acid substitution matrix
1091     */
1092    public static SubstitutionMatrix getPam60() {
1093        return getAminoAcidMatrix("PAM60");
1094    }
1095
1096    /**
1097     * Return the <code>PAM70</code> amino acid substitution matrix.
1098     *
1099     * @return the <code>PAM70</code> amino acid substitution matrix
1100     */
1101    public static SubstitutionMatrix getPam70() {
1102        return getAminoAcidMatrix("PAM70");
1103    }
1104
1105    /**
1106     * Return the <code>PAM80</code> amino acid substitution matrix.
1107     *
1108     * @return the <code>PAM80</code> amino acid substitution matrix
1109     */
1110    public static SubstitutionMatrix getPam80() {
1111        return getAminoAcidMatrix("PAM80");
1112    }
1113
1114    /**
1115     * Return the <code>PAM90</code> amino acid substitution matrix.
1116     *
1117     * @return the <code>PAM90</code> amino acid substitution matrix
1118     */
1119    public static SubstitutionMatrix getPam90() {
1120        return getAminoAcidMatrix("PAM90");
1121    }
1122
1123        /**
1124         * This method tries to identify the alphabet within a matrix file. This is
1125         * necessary in cases where we do not know if this is a matrix for DNA, RNA
1126         * or PROTEIN/PROTEIN-TERM.
1127         * 
1128         * @param file
1129         * @return
1130         * @throws IOException
1131         * @throws BioException
1132         * @throws NoSuchElementException
1133         * @throws BioException
1134         */
1135        private static FiniteAlphabet guessAlphabet(File file) throws IOException,
1136                        NoSuchElementException, BioException {
1137                String fileName = file.getName().toLowerCase();
1138                if (fileName.contains("pam") || fileName.contains("blosum"))
1139                        return (FiniteAlphabet) AlphabetManager
1140                                        .alphabetForName("PROTEIN-TERM");
1141                return guessAlphabet(new BufferedReader(new FileReader(file)));
1142        }
1143
1144        /**
1145         * This method guesses the alphabet of the given substituttion matrix which
1146         * is required for the parser.
1147         * 
1148         * @param reader
1149         * @return
1150         * @throws IOException
1151         * @throws BioException
1152         */
1153        private static FiniteAlphabet guessAlphabet(BufferedReader reader)
1154                        throws IOException, BioException {
1155                String line, trim;
1156                FiniteAlphabet alphabet = null;
1157                while (reader.ready()) {
1158                        line = reader.readLine();
1159                        if (line == null)
1160                                break;
1161                        trim = line.trim();
1162                        if (trim.length()==0 || trim.charAt(0) == '#')
1163                                continue;
1164                        // Use line in next if-clause because trim will have lost leading important whitespace.
1165                        else if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) {
1166                                String alphabets[] = new String[] { "DNA", "RNA", "PROTEIN",
1167                                                "PROTEIN-TERM" };
1168                                SymbolTokenization symtok;
1169                                for (int i = 0; i < alphabets.length; i++) {
1170                                        alphabet = (FiniteAlphabet) AlphabetManager
1171                                                        .alphabetForName(alphabets[i]);
1172                                        symtok = alphabet.getTokenization("token");
1173                                        StringTokenizer st = new StringTokenizer(trim);
1174                                        boolean noError = true;
1175                                        for (int j = 0; st.hasMoreElements(); j++)
1176                                                try {
1177                                                        symtok.parseToken(st.nextElement().toString());
1178                                                } catch (IllegalSymbolException exc) {
1179                                                        noError = false;
1180                                                        break;
1181                                                }
1182                                        if (noError)
1183                                                return alphabet;
1184                                }
1185                        }
1186                }
1187                throw new BioException(
1188                                "Unknow alphabet used in this substitution matrix");
1189        }
1190
1191        /**
1192         * Reads a String representing the contents of a substitution matrix file.
1193         * 
1194         * @param matrixObj
1195         * @return matrix
1196         * @throws BioException
1197         * @throws IOException
1198         * @throws NumberFormatException
1199         */
1200        private short[][] parseMatrix(Object matrixObj) throws BioException,
1201                        NumberFormatException, IOException {
1202                int j = 0, rows = 0, cols = 0;
1203                SymbolTokenization symtok = alphabet.getTokenization("token");
1204                StringTokenizer st;
1205                String line, trim;
1206
1207                this.min = Short.MAX_VALUE;
1208                this.max = Short.MIN_VALUE;
1209                /*
1210                 * First: count how many elements are in the matrix fill lines and rows
1211                 */
1212                Reader reader;
1213                if (matrixObj instanceof File)
1214                        reader = new FileReader((File) matrixObj);
1215                else if (matrixObj instanceof String)
1216                        reader = new StringReader(matrixObj.toString());
1217                else
1218                        return null;
1219                BufferedReader br = new BufferedReader(reader);
1220
1221                while (br.ready()) {
1222                        line = br.readLine();
1223                        if (line == null)
1224                                break;
1225                        trim = line.trim();
1226                        if (trim.length() == 0)
1227                                continue;
1228                        if (trim.charAt(0) == '#') {
1229                                description += line.substring(1);
1230                                continue;
1231                        } 
1232                        // Use line in next if-clause because trim will have lost leading important whitespace.
1233                        else if (!line.startsWith(newLine)) {
1234                                if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) {
1235                                        st = new StringTokenizer(trim);
1236                                        for (j = 0; st.hasMoreElements(); j++) {
1237                                                colSymbols.put(symtok.parseToken(st.nextElement()
1238                                                                .toString()), Integer.valueOf(j));
1239                                        }
1240                                        cols = j;
1241                                } else {
1242                                        // the matrix.
1243                                        st = new StringTokenizer(trim);
1244                                        if (st.hasMoreElements())
1245                                                rowSymbols.put(symtok.parseToken(st.nextElement()
1246                                                                .toString()), Integer.valueOf(rows++));
1247                                }
1248                        }
1249                }
1250                br.close();
1251
1252                short[][] matrix = new short[rows][cols];
1253
1254                rows = 0;
1255                if (matrixObj instanceof File)
1256                        reader = new FileReader((File) matrixObj);
1257                else if (matrixObj instanceof String)
1258                        reader = new StringReader(matrixObj.toString());
1259                else
1260                        return null;
1261                br = new BufferedReader(reader);
1262
1263                /*
1264                 * Second reading. Fill the matrix.
1265                 */
1266                while (br.ready()) {
1267                        line = br.readLine();
1268                        if (line == null)
1269                                break;
1270                        trim = line.trim();
1271                        if (trim.length() == 0 || trim.charAt(0) == '#')
1272                                continue;
1273                        else if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t'))
1274                                continue;
1275                        // Use line in next if-clause because trim will have lost leading important whitespace.
1276                        else if (!line.startsWith(newLine)) { // lines:
1277                                st = new StringTokenizer(trim);
1278                                if (st.hasMoreElements())
1279                                        st.nextElement(); // throw away Symbol at
1280                                // beginning.
1281                                for (j = 0; st.hasMoreElements(); j++) {// cols:
1282                                        matrix[rows][j] = (short) Math.round(Double.parseDouble(st
1283                                                        .nextElement().toString()));
1284                                        if (matrix[rows][j] > max)
1285                                                max = matrix[rows][j]; // maximum.
1286                                        if (matrix[rows][j] < min)
1287                                                min = matrix[rows][j]; // minimum.
1288                                }
1289                                rows++;
1290                        }
1291                }
1292                br.close();
1293
1294                return matrix;
1295        }
1296
1297        /**
1298         * There are some substitution matrices containing more columns than lines.
1299         * This has to do with the ambiguous symbols. Lines are always good, columns
1300         * might not contain the whole information. The matrix is supposed to be
1301         * symmetric anyway, so you can always set the ambiguous symbol to be the
1302         * first argument.
1303         * 
1304         * @param row
1305         *            Symbol of the line
1306         * @param col
1307         *            Symbol of the column
1308         * @return expenses for the exchange of symbol row and symbol column.
1309         * @throws BioException
1310         */
1311        public short getValueAt(Symbol row, Symbol col) throws BioException {
1312                if ((!rowSymbols.containsKey(row)) || (!colSymbols.containsKey(col))) {
1313                        System.err.printf("SubstitutionMatrix: No entry for the symbols %s and %s\n",
1314                                        row.getName(), col.getName());
1315
1316                        // treat the two records as X:
1317                        return 0;
1318                }
1319                return matrix[rowSymbols.get(row).intValue()][colSymbols.get(col)
1320                                .intValue()];
1321        }
1322
1323        /**
1324         * This gives you the description of this matrix if there is one. Normally
1325         * substitution matrix files like BLOSUM contain some lines of description.
1326         * 
1327         * @return the comment of the matrix
1328         */
1329        public String getDescription() {
1330                return description;
1331        }
1332
1333        /**
1334         * Every substitution matrix has a name like "BLOSUM30" or "PAM160". This
1335         * will be returned by this method.
1336         * 
1337         * @return the name of the matrix.
1338         */
1339        public String getName() {
1340                return name;
1341        }
1342
1343        /**
1344         * The minimum score of this matrix.
1345         * 
1346         * @return minimum of the matrix.
1347         */
1348        public short getMin() {
1349                return min;
1350        }
1351
1352        /**
1353         * The maximum score in this matrix.
1354         * 
1355         * @return maximum of the matrix.
1356         */
1357        public short getMax() {
1358                return max;
1359        }
1360
1361        /**
1362         * Sets the description to the given value.
1363         * 
1364         * @param desc
1365         *            a description. This doesn't have to start with '#'.
1366         */
1367        public void setDescription(String desc) {
1368                this.description = desc;
1369        }
1370
1371        /**
1372         * Gives the alphabet used by this matrix.
1373         * 
1374         * @return the alphabet of this matrix.
1375         */
1376        public FiniteAlphabet getAlphabet() {
1377                return alphabet;
1378        }
1379
1380        /**
1381         * Creates a <code>String</code> representation of this matrix.
1382         * 
1383         * @return a string representation of this matrix without the description.
1384         */
1385        public String stringnifyMatrix() {
1386                int i = 0;
1387                StringBuffer matrixString = new StringBuffer();
1388                Symbol[] colSyms = new Symbol[this.colSymbols.keySet().size()];
1389
1390                try {
1391                        SymbolTokenization symtok = alphabet.getTokenization("default");
1392                        matrixString.append("  ");
1393                        Iterator<Symbol> colKeys = colSymbols.keySet().iterator();
1394                        while (colKeys.hasNext()) {
1395                                colSyms[i] = colKeys.next();
1396                                matrixString.append(symtok.tokenizeSymbol(colSyms[i++])
1397                                                .toUpperCase());
1398                                matrixString.append(' ');
1399                        }
1400                        matrixString.append(newLine);
1401
1402                        Iterator<Symbol> rowKeys = rowSymbols.keySet().iterator();
1403                        while (rowKeys.hasNext()) {
1404                                Symbol rowSym = rowKeys.next();
1405                                matrixString
1406                                                .append(symtok.tokenizeSymbol(rowSym).toUpperCase());
1407                                matrixString.append(' ');
1408                                for (i = 0; i < colSyms.length; i++) {
1409                                        matrixString.append(getValueAt(rowSym, colSyms[i]));
1410                                        matrixString.append(' ');
1411                                }
1412                                matrixString.append(newLine);
1413                        }
1414                } catch (BioException exc) {
1415                        exc.printStackTrace();
1416                }
1417                return matrixString.toString();
1418        }
1419
1420        /**
1421         * Converts the description of the matrix to a String.
1422         * 
1423         * @return Gives a description with approximately 60 letters on every line
1424         *         separated by <code>System.getProperty("line.separator")</code>.
1425         *         Every line starts with <code>#</code>.
1426         */
1427        public String stringnifyDescription() {
1428                StringBuffer desc = new StringBuffer(), line = new StringBuffer();
1429                line.append("# ");
1430                StringTokenizer st = new StringTokenizer(description, " ");
1431                while (st.hasMoreElements()) {
1432                        line.append(st.nextElement().toString());
1433                        line.append(' ');
1434                        if (line.length() >= 60) {
1435                                desc.append(line);
1436                                desc.append(newLine);
1437                                if (st.hasMoreElements()) {
1438                                        line = new StringBuffer();
1439                                        line.append("# ");
1440                                }
1441                        } else if (!st.hasMoreElements()) {
1442                                desc.append(line);
1443                                desc.append(newLine);
1444                        }
1445                }
1446                return desc.toString();
1447        }
1448
1449        /**
1450         * Overrides the inherited method.
1451         * 
1452         * @return Gives a string representation of the SubstitutionMatrix. This is
1453         *         a valid input for the constructor which needs a matrix string.
1454         *         This String also contains the description of the matrix if there
1455         *         is one.
1456         */
1457        @Override
1458        public String toString() {
1459                StringBuffer desc = new StringBuffer(), line = new StringBuffer();
1460                line.append("# ");
1461                StringTokenizer st = new StringTokenizer(description);
1462                while (st.hasMoreElements()) {
1463                        line.append(st.nextElement().toString());
1464                        line.append(' ');
1465                        if (line.length() >= 60) {
1466                                desc.append(line);
1467                                desc.append(newLine);
1468                                if (st.hasMoreElements()) {
1469                                        line = new StringBuffer();
1470                                        line.append("# ");
1471                                }
1472                        } else if (!st.hasMoreElements()) {
1473                                desc.append(line);
1474                                desc.append(newLine);
1475                        }
1476                }
1477                desc.append(stringnifyMatrix());
1478                return desc.toString();
1479        }
1480
1481        /**
1482         * Just to perform some test. It prints the matrix on the screen.
1483         */
1484        public void printMatrix() {
1485                // Test output:
1486                Iterator<Symbol> rowKeys = rowSymbols.keySet().iterator();
1487                while (rowKeys.hasNext()) {
1488                        Iterator<Symbol> colKeys = colSymbols.keySet().iterator();
1489                        Symbol rowSym = rowKeys.next();
1490                        System.out.print(rowSym.getName() + "\t");
1491                        while (colKeys.hasNext()) {
1492                                Symbol colSym = colKeys.next();
1493                                int x = rowSymbols.get(rowSym).intValue();
1494                                int y = colSymbols.get(colSym).intValue();
1495                                System.out.print(colSym.getName() + " " + " " + x + " " + y
1496                                                + " " + matrix[x][y] + "\t");
1497                        }
1498                        System.out.println(newLine);
1499                }
1500                System.out.println(toString());
1501        }
1502
1503        /**
1504         * With this method you can get a &ldquo;normalized&rdquo;
1505         * <code>SubstitutionMatrix</code> object; however, since this
1506         * implementation uses an short matrix, the normalized matrix will be scaled
1507         * by ten. If you need values between zero and one, you have to divide every
1508         * value returned by <code>getValueAt</code> by ten.
1509         * 
1510         * @return a new and normalized <code>SubstitutionMatrix</code> object given
1511         *         by this substitution matrix. Because this uses an
1512         *         <code>short</code> matrix, all values are scaled by 10.
1513         * @throws BioException
1514         * @throws IOException
1515         * @throws NumberFormatException
1516         */
1517        public SubstitutionMatrix normalizeMatrix() throws BioException,
1518                        NumberFormatException, IOException {
1519                int i, j;
1520                short min = getMin(), newMax = Short.MIN_VALUE;
1521                short[][] mat = new short[matrix.length][matrix[matrix.length - 1].length];
1522                String name = getName() + "_normalized";
1523                String matString = stringnifyDescription() + "  ";
1524                FiniteAlphabet alphabet = getAlphabet();
1525                Map<Symbol, Integer> rowMap = this.rowSymbols;
1526                Map<Symbol, Integer> colMap = this.colSymbols;
1527                SymbolTokenization symtok = alphabet.getTokenization("default");
1528
1529                for (i = 0; i < matrix.length; i++)
1530                        for (j = 0; j < matrix[matrix.length - 1].length; j++) {
1531                                mat[i][j] = (short) (matrix[i][j] - min);
1532                                if (mat[i][j] > newMax)
1533                                        newMax = mat[i][j];
1534                        }
1535
1536                for (i = 0; i < mat.length; i++)
1537                        for (j = 0; j < mat[mat.length - 1].length; j++)
1538                                mat[i][j] = (short) (mat[i][j] * 10 / newMax);
1539
1540                Object[] rows = rowSymbols.keySet().toArray();
1541                Object[] cols = colSymbols.keySet().toArray();
1542                for (i = 0; i < cols.length; i++)
1543                        matString += symtok.tokenizeSymbol((Symbol) cols[i]) + " ";
1544                for (i = 0; i < rows.length; i++) {
1545                        matString += newLine + symtok.tokenizeSymbol((Symbol) rows[i])
1546                                        + " ";
1547                        for (j = 0; j < cols.length; j++) {
1548                                matString += mat[rowMap.get((Symbol) rows[i]).intValue()][colMap
1549                                                .get((Symbol) cols[j]).intValue()]
1550                                                + " ";
1551                        }
1552                }
1553                matString += newLine;
1554                return new SubstitutionMatrix(alphabet, matString, name);
1555        }
1556
1557}