001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on July 26, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava.nbio.core.alignment.matrices;
025
026import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
027import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
029import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
030import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
031
032import java.io.InputStream;
033import java.io.InputStreamReader;
034import java.io.Serializable;
035import java.util.HashMap;
036import java.util.Map;
037
038/**
039 * Static utility to access substitution matrices that come bundled with BioJava.  All matrices were downloaded from
040 * ftp://ftp.ncbi.nih.gov/blast/matrices/
041 *
042 * @author Mark Chapman
043 * @author Paolo Pavan
044 */
045public class SubstitutionMatrixHelper implements Serializable {
046
047        /**
048         *
049         */
050        private static final long serialVersionUID = 148491724604653225L;
051
052        private static Map<String, SubstitutionMatrix<AminoAcidCompound>> aminoAcidMatrices =
053                        new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>();
054        private static Map<String, SubstitutionMatrix<NucleotideCompound>> nucleotideMatrices =
055                        new HashMap<String, SubstitutionMatrix<NucleotideCompound>>();
056
057        // prevents instantiation
058        private SubstitutionMatrixHelper() { }
059
060
061        /** Returns any matrix from the AAINDEX database file
062         *
063         * @param matrixName
064         * @return a {@link SubstitutionMatrix}
065         */
066        public static SubstitutionMatrix<AminoAcidCompound> getMatrixFromAAINDEX(String matrixName){
067
068                return AAindexFactory.getAAIndexProvider().getMatrix(matrixName);
069
070        }
071
072        /**
073         * Gets identity matrix where matches score 1 and mismatches score -10000
074         * @return
075         */
076        public static SubstitutionMatrix<AminoAcidCompound> getIdentity() {
077                return getAminoAcidMatrix("identity");
078        }
079
080        /**
081         * Returns Blosum 100 matrix by Henikoff & Henikoff
082         * @return Blosum 100 matrix
083         */
084        public static SubstitutionMatrix<AminoAcidCompound> getBlosum100() {
085                return getAminoAcidMatrix("blosum100");
086        }
087
088        /**
089         * Returns Blosum 30 matrix by Henikoff & Henikoff
090         * @return Blosum 30 matrix
091         */
092        public static SubstitutionMatrix<AminoAcidCompound> getBlosum30() {
093                return getAminoAcidMatrix("blosum30");
094        }
095
096        /**
097         * Returns Blosum 35 matrix by Henikoff & Henikoff
098         * @return Blosum 35 matrix
099         */
100        public static SubstitutionMatrix<AminoAcidCompound> getBlosum35() {
101                return getAminoAcidMatrix("blosum35");
102        }
103
104        /**
105         * Returns Blosum 40 matrix by Henikoff & Henikoff
106         * @return Blosum 40 matrix
107         */
108        public static SubstitutionMatrix<AminoAcidCompound> getBlosum40() {
109                return getAminoAcidMatrix("blosum40");
110        }
111
112        /**
113         * Returns Blosum 45 matrix by Henikoff & Henikoff
114         * @return Blosum 45 matrix
115         */
116        public static SubstitutionMatrix<AminoAcidCompound> getBlosum45() {
117                return getAminoAcidMatrix("blosum45");
118        }
119
120        /**
121         * Returns Blosum 50 matrix by Henikoff & Henikoff
122         * @return Blosum 50 matrix
123         */
124        public static SubstitutionMatrix<AminoAcidCompound> getBlosum50() {
125                return getAminoAcidMatrix("blosum50");
126        }
127
128        /**
129         * Returns Blosum 55 matrix by Henikoff & Henikoff
130         * @return Blosum 55 matrix
131         */
132        public static SubstitutionMatrix<AminoAcidCompound> getBlosum55() {
133                return getAminoAcidMatrix("blosum55");
134        }
135
136        /**
137         * Returns Blosum 60 matrix by Henikoff & Henikoff
138         * @return Blosum 60 matrix
139         */
140        public static SubstitutionMatrix<AminoAcidCompound> getBlosum60() {
141                return getAminoAcidMatrix("blosum60");
142        }
143
144        /**
145         * Returns Blosum 62 matrix by Henikoff & Henikoff
146         * @return Blosum 62 matrix
147         */
148        public static SubstitutionMatrix<AminoAcidCompound> getBlosum62() {
149                return getAminoAcidMatrix("blosum62");
150        }
151
152        /**
153         * Returns Blosum 65 matrix by Henikoff & Henikoff
154         * @return Blosum 65 matrix
155         */
156        public static SubstitutionMatrix<AminoAcidCompound> getBlosum65() {
157                return getAminoAcidMatrix("blosum65");
158        }
159
160        /**
161         * Returns Blosum 70 matrix by Henikoff & Henikoff
162         * @return Blosum 70 matrix
163         */
164        public static SubstitutionMatrix<AminoAcidCompound> getBlosum70() {
165                return getAminoAcidMatrix("blosum70");
166        }
167
168        /**
169         * Returns Blosum 75 matrix by Henikoff & Henikoff
170         * @return Blosum 75 matrix
171         */
172        public static SubstitutionMatrix<AminoAcidCompound> getBlosum75() {
173                return getAminoAcidMatrix("blosum75");
174        }
175
176        /**
177         * Returns Blosum 80 matrix by Henikoff & Henikoff
178         * @return Blosum 80 matrix
179         */
180        public static SubstitutionMatrix<AminoAcidCompound> getBlosum80() {
181                return getAminoAcidMatrix("blosum80");
182        }
183
184        /**
185         * Returns Blosum 85 matrix by Henikoff & Henikoff
186         * @return Blosum 85 matrix
187         */
188        public static SubstitutionMatrix<AminoAcidCompound> getBlosum85() {
189                return getAminoAcidMatrix("blosum85");
190        }
191
192        /**
193         * Returns Blosum 90 matrix by Henikoff & Henikoff
194         * @return Blosum 90 matrix
195         */
196        public static SubstitutionMatrix<AminoAcidCompound> getBlosum90() {
197                return getAminoAcidMatrix("blosum90");
198        }
199
200        /**
201         * Returns PAM 250 matrix by Gonnet, Cohen & Benner
202         * @return Gonnet 250 matrix
203         */
204        public static SubstitutionMatrix<AminoAcidCompound> getGonnet250() {
205                return getAminoAcidMatrix("gonnet250");
206        }
207
208        /**
209         * Returns Nuc 4.2 matrix by Lowe
210         * Only the first nucleotide sequence to align can contain ambiguous nucleotides
211         * @return Nuc 4.2 matrix
212         */
213        public static SubstitutionMatrix<NucleotideCompound> getNuc4_2() {
214                return getNucleotideMatrix("nuc-4_2");
215        }
216
217        /**
218         * Returns Nuc 4.4 matrix by Lowe
219         * Both of the nucleotide sequences to align can contain ambiguous nucleotides
220         * @return Nuc 4.4 matrix
221         */
222        public static SubstitutionMatrix<NucleotideCompound> getNuc4_4() {
223                return getNucleotideMatrix("nuc-4_4");
224        }
225
226        /**
227         * Returns PAM 250 matrix by Dayhoff
228         * @return PAM 250 matrix
229         */
230        public static SubstitutionMatrix<AminoAcidCompound> getPAM250() {
231                return getAminoAcidMatrix("pam250");
232        }
233
234        // helper methods
235
236        /**
237         * Returns a substitution matrix for {@link AminoAcidCompound amino acids} given by the name {@code name}.
238         * Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath
239         * in src/main/resources/matrices.
240         * Example names:
241         * <ul>
242         * <li>blosum62</li>
243         * <li>JOND920103</li>
244         * <li>pam250</li>
245         * <li>gonnet250</li>
246         * </ul>
247         * @param name Either a common name or an AAINDEX name
248         * @return a {@code} SubstitutionMatrix {@code} or {@code}null{@code} if no matrix is found
249         */
250        public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatrix(String name) {
251                SubstitutionMatrix<AminoAcidCompound> matrix = getMatrixFromAAINDEX(name);
252                if (matrix != null) return matrix;
253                return getAminoAcidMatrix(name);
254        }
255
256        // reads in an amino acid substitution matrix, if necessary
257        private static SubstitutionMatrix<AminoAcidCompound> getAminoAcidMatrix(String file) {
258                if (!aminoAcidMatrices.containsKey(file)) {
259                        InputStreamReader reader = getReader(file);
260                        if (reader == null) {
261                                return null;
262                        }
263                        aminoAcidMatrices.put(file, new SimpleSubstitutionMatrix<AminoAcidCompound>(
264                                        AminoAcidCompoundSet.getAminoAcidCompoundSet(), reader , file));
265                }
266                return aminoAcidMatrices.get(file);
267        }
268
269        // reads in a nucleotide substitution matrix, if necessary
270        private static SubstitutionMatrix<NucleotideCompound> getNucleotideMatrix(String file) {
271                if (!nucleotideMatrices.containsKey(file)) {
272                        nucleotideMatrices.put(file, new SimpleSubstitutionMatrix<NucleotideCompound>(
273                                        AmbiguityDNACompoundSet.getDNACompoundSet(), getReader(file), file));
274                }
275                return nucleotideMatrices.get(file);
276        }
277
278        // reads in a substitution matrix from a resource file
279        private static InputStreamReader getReader(String file) {
280                String resourcePathPrefix = "matrices/";
281                InputStream is = SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt",
282                resourcePathPrefix+file));
283                if (is == null) {
284                        return null;
285                }
286                return new InputStreamReader(is);
287        }
288
289}