001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on July 26, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.matrices;
025
026import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
027import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
029import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
030import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
031
032import java.io.InputStreamReader;
033import java.io.Serializable;
034import java.util.HashMap;
035import java.util.Map;
036
037/**
038 * Static utility to access substitution matrices that come bundled with BioJava.  All matrices were downloaded from
039 * ftp://ftp.ncbi.nih.gov/blast/matrices/
040 *
041 * @author Mark Chapman
042 * @author Paolo Pavan
043 */
044public class SubstitutionMatrixHelper implements Serializable {
045
046        /**
047         *
048         */
049        private static final long serialVersionUID = 148491724604653225L;
050
051        private static Map<String, SubstitutionMatrix<AminoAcidCompound>> aminoAcidMatrices =
052                        new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>();
053        private static Map<String, SubstitutionMatrix<NucleotideCompound>> nucleotideMatrices =
054                        new HashMap<String, SubstitutionMatrix<NucleotideCompound>>();
055
056        // prevents instantiation
057        private SubstitutionMatrixHelper() { }
058
059
060        /** Returns any matrix from the AAINDEX database file
061         *
062         * @param matrixName
063         * @return a {@link SubstitutionMatrix}
064         */
065        public static SubstitutionMatrix<AminoAcidCompound> getMatrixFromAAINDEX(String matrixName){
066
067                return AAindexFactory.getAAIndexProvider().getMatrix(matrixName);
068
069        }
070
071
072        public static SubstitutionMatrix<AminoAcidCompound> getIdentity() {
073                return getAminoAcidMatrix("identity");
074        }
075
076        /**
077         * Returns Blosum 100 matrix by Henikoff & Henikoff
078         * @return Blosum 100 matrix
079         */
080        public static SubstitutionMatrix<AminoAcidCompound> getBlosum100() {
081                return getAminoAcidMatrix("blosum100");
082        }
083
084        /**
085         * Returns Blosum 30 matrix by Henikoff & Henikoff
086         * @return Blosum 30 matrix
087         */
088        public static SubstitutionMatrix<AminoAcidCompound> getBlosum30() {
089                return getAminoAcidMatrix("blosum30");
090        }
091
092        /**
093         * Returns Blosum 35 matrix by Henikoff & Henikoff
094         * @return Blosum 35 matrix
095         */
096        public static SubstitutionMatrix<AminoAcidCompound> getBlosum35() {
097                return getAminoAcidMatrix("blosum35");
098        }
099
100        /**
101         * Returns Blosum 40 matrix by Henikoff & Henikoff
102         * @return Blosum 40 matrix
103         */
104        public static SubstitutionMatrix<AminoAcidCompound> getBlosum40() {
105                return getAminoAcidMatrix("blosum40");
106        }
107
108        /**
109         * Returns Blosum 45 matrix by Henikoff & Henikoff
110         * @return Blosum 45 matrix
111         */
112        public static SubstitutionMatrix<AminoAcidCompound> getBlosum45() {
113                return getAminoAcidMatrix("blosum45");
114        }
115
116        /**
117         * Returns Blosum 50 matrix by Henikoff & Henikoff
118         * @return Blosum 50 matrix
119         */
120        public static SubstitutionMatrix<AminoAcidCompound> getBlosum50() {
121                return getAminoAcidMatrix("blosum50");
122        }
123
124        /**
125         * Returns Blosum 55 matrix by Henikoff & Henikoff
126         * @return Blosum 55 matrix
127         */
128        public static SubstitutionMatrix<AminoAcidCompound> getBlosum55() {
129                return getAminoAcidMatrix("blosum55");
130        }
131
132        /**
133         * Returns Blosum 60 matrix by Henikoff & Henikoff
134         * @return Blosum 60 matrix
135         */
136        public static SubstitutionMatrix<AminoAcidCompound> getBlosum60() {
137                return getAminoAcidMatrix("blosum60");
138        }
139
140        /**
141         * Returns Blosum 62 matrix by Henikoff & Henikoff
142         * @return Blosum 62 matrix
143         */
144        public static SubstitutionMatrix<AminoAcidCompound> getBlosum62() {
145                return getAminoAcidMatrix("blosum62");
146        }
147
148        /**
149         * Returns Blosum 65 matrix by Henikoff & Henikoff
150         * @return Blosum 65 matrix
151         */
152        public static SubstitutionMatrix<AminoAcidCompound> getBlosum65() {
153                return getAminoAcidMatrix("blosum65");
154        }
155
156        /**
157         * Returns Blosum 70 matrix by Henikoff & Henikoff
158         * @return Blosum 70 matrix
159         */
160        public static SubstitutionMatrix<AminoAcidCompound> getBlosum70() {
161                return getAminoAcidMatrix("blosum70");
162        }
163
164        /**
165         * Returns Blosum 75 matrix by Henikoff & Henikoff
166         * @return Blosum 75 matrix
167         */
168        public static SubstitutionMatrix<AminoAcidCompound> getBlosum75() {
169                return getAminoAcidMatrix("blosum75");
170        }
171
172        /**
173         * Returns Blosum 80 matrix by Henikoff & Henikoff
174         * @return Blosum 80 matrix
175         */
176        public static SubstitutionMatrix<AminoAcidCompound> getBlosum80() {
177                return getAminoAcidMatrix("blosum80");
178        }
179
180        /**
181         * Returns Blosum 85 matrix by Henikoff & Henikoff
182         * @return Blosum 85 matrix
183         */
184        public static SubstitutionMatrix<AminoAcidCompound> getBlosum85() {
185                return getAminoAcidMatrix("blosum85");
186        }
187
188        /**
189         * Returns Blosum 90 matrix by Henikoff & Henikoff
190         * @return Blosum 90 matrix
191         */
192        public static SubstitutionMatrix<AminoAcidCompound> getBlosum90() {
193                return getAminoAcidMatrix("blosum90");
194        }
195
196        /**
197         * Returns PAM 250 matrix by Gonnet, Cohen & Benner
198         * @return Gonnet 250 matrix
199         */
200        public static SubstitutionMatrix<AminoAcidCompound> getGonnet250() {
201                return getAminoAcidMatrix("gonnet250");
202        }
203
204        /**
205         * Returns Nuc 4.2 matrix by Lowe
206         * Only the first nucleotide sequence to align can contain ambiguous nucleotides
207         * @return Nuc 4.2 matrix
208         */
209        public static SubstitutionMatrix<NucleotideCompound> getNuc4_2() {
210                return getNucleotideMatrix("nuc-4_2");
211        }
212
213        /**
214         * Returns Nuc 4.4 matrix by Lowe
215         * Both of the nucleotide sequences to align can contain ambiguous nucleotides
216         * @return Nuc 4.4 matrix
217         */
218        public static SubstitutionMatrix<NucleotideCompound> getNuc4_4() {
219                return getNucleotideMatrix("nuc-4_4");
220        }
221
222        /**
223         * Returns PAM 250 matrix by Dayhoff
224         * @return PAM 250 matrix
225         */
226        public static SubstitutionMatrix<AminoAcidCompound> getPAM250() {
227                return getAminoAcidMatrix("pam250");
228        }
229
230        // helper methods
231
232        /**
233         * Returns a substitution matrix for {@link AminoAcidCompound amino acids} given by the name {@code name}.
234         * Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath.
235         * If the required matrix does not exist, null is returned.
236         * Example names:
237         * <ul>
238         * <li>blosum62</li>
239         * <li>JOND920103</li>
240         * <li>pam250</li>
241         * <li>gonnet250</li>
242         * </ul>
243         * @param name Either a common name or an AAINDEX name
244         */
245        public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatrix(String name) {
246                SubstitutionMatrix<AminoAcidCompound> matrix = getMatrixFromAAINDEX(name);
247                if (matrix != null) return matrix;
248                return getAminoAcidMatrix(name);
249        }
250
251        // reads in an amino acid substitution matrix, if necessary
252        private static SubstitutionMatrix<AminoAcidCompound> getAminoAcidMatrix(String file) {
253                if (!aminoAcidMatrices.containsKey(file)) {
254                        aminoAcidMatrices.put(file, new SimpleSubstitutionMatrix<AminoAcidCompound>(
255                                        AminoAcidCompoundSet.getAminoAcidCompoundSet(), getReader(file), file));
256                }
257                return aminoAcidMatrices.get(file);
258        }
259
260        // reads in a nucleotide substitution matrix, if necessary
261        private static SubstitutionMatrix<NucleotideCompound> getNucleotideMatrix(String file) {
262                if (!nucleotideMatrices.containsKey(file)) {
263                        nucleotideMatrices.put(file, new SimpleSubstitutionMatrix<NucleotideCompound>(
264                                        AmbiguityDNACompoundSet.getDNACompoundSet(), getReader(file), file));
265                }
266                return nucleotideMatrices.get(file);
267        }
268
269        // reads in a substitution matrix from a resource file
270        private static InputStreamReader getReader(String file) {
271                String resourcePathPrefix = "matrices/";
272                return new InputStreamReader(SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt",
273                                resourcePathPrefix+file)));
274        }
275
276}