001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on July 26, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment.matrices; 025 026import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 027import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; 028import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 029import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 030import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 031 032import java.io.InputStreamReader; 033import java.io.Serializable; 034import java.util.HashMap; 035import java.util.Map; 036 037/** 038 * Static utility to access substitution matrices that come bundled with BioJava. All matrices were downloaded from 039 * ftp://ftp.ncbi.nih.gov/blast/matrices/ 040 * 041 * @author Mark Chapman 042 * @author Paolo Pavan 043 */ 044public class SubstitutionMatrixHelper implements Serializable { 045 046 /** 047 * 048 */ 049 private static final long serialVersionUID = 148491724604653225L; 050 051 private static Map<String, SubstitutionMatrix<AminoAcidCompound>> aminoAcidMatrices = 052 new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>(); 053 private static Map<String, SubstitutionMatrix<NucleotideCompound>> nucleotideMatrices = 054 new HashMap<String, SubstitutionMatrix<NucleotideCompound>>(); 055 056 // prevents instantiation 057 private SubstitutionMatrixHelper() { } 058 059 060 /** Returns any matrix from the AAINDEX database file 061 * 062 * @param matrixName 063 * @return a {@link SubstitutionMatrix} 064 */ 065 public static SubstitutionMatrix<AminoAcidCompound> getMatrixFromAAINDEX(String matrixName){ 066 067 return AAindexFactory.getAAIndexProvider().getMatrix(matrixName); 068 069 } 070 071 072 public static SubstitutionMatrix<AminoAcidCompound> getIdentity() { 073 return getAminoAcidMatrix("identity"); 074 } 075 076 /** 077 * Returns Blosum 100 matrix by Henikoff & Henikoff 078 * @return Blosum 100 matrix 079 */ 080 public static SubstitutionMatrix<AminoAcidCompound> getBlosum100() { 081 return getAminoAcidMatrix("blosum100"); 082 } 083 084 /** 085 * Returns Blosum 30 matrix by Henikoff & Henikoff 086 * @return Blosum 30 matrix 087 */ 088 public static SubstitutionMatrix<AminoAcidCompound> getBlosum30() { 089 return getAminoAcidMatrix("blosum30"); 090 } 091 092 /** 093 * Returns Blosum 35 matrix by Henikoff & Henikoff 094 * @return Blosum 35 matrix 095 */ 096 public static SubstitutionMatrix<AminoAcidCompound> getBlosum35() { 097 return getAminoAcidMatrix("blosum35"); 098 } 099 100 /** 101 * Returns Blosum 40 matrix by Henikoff & Henikoff 102 * @return Blosum 40 matrix 103 */ 104 public static SubstitutionMatrix<AminoAcidCompound> getBlosum40() { 105 return getAminoAcidMatrix("blosum40"); 106 } 107 108 /** 109 * Returns Blosum 45 matrix by Henikoff & Henikoff 110 * @return Blosum 45 matrix 111 */ 112 public static SubstitutionMatrix<AminoAcidCompound> getBlosum45() { 113 return getAminoAcidMatrix("blosum45"); 114 } 115 116 /** 117 * Returns Blosum 50 matrix by Henikoff & Henikoff 118 * @return Blosum 50 matrix 119 */ 120 public static SubstitutionMatrix<AminoAcidCompound> getBlosum50() { 121 return getAminoAcidMatrix("blosum50"); 122 } 123 124 /** 125 * Returns Blosum 55 matrix by Henikoff & Henikoff 126 * @return Blosum 55 matrix 127 */ 128 public static SubstitutionMatrix<AminoAcidCompound> getBlosum55() { 129 return getAminoAcidMatrix("blosum55"); 130 } 131 132 /** 133 * Returns Blosum 60 matrix by Henikoff & Henikoff 134 * @return Blosum 60 matrix 135 */ 136 public static SubstitutionMatrix<AminoAcidCompound> getBlosum60() { 137 return getAminoAcidMatrix("blosum60"); 138 } 139 140 /** 141 * Returns Blosum 62 matrix by Henikoff & Henikoff 142 * @return Blosum 62 matrix 143 */ 144 public static SubstitutionMatrix<AminoAcidCompound> getBlosum62() { 145 return getAminoAcidMatrix("blosum62"); 146 } 147 148 /** 149 * Returns Blosum 65 matrix by Henikoff & Henikoff 150 * @return Blosum 65 matrix 151 */ 152 public static SubstitutionMatrix<AminoAcidCompound> getBlosum65() { 153 return getAminoAcidMatrix("blosum65"); 154 } 155 156 /** 157 * Returns Blosum 70 matrix by Henikoff & Henikoff 158 * @return Blosum 70 matrix 159 */ 160 public static SubstitutionMatrix<AminoAcidCompound> getBlosum70() { 161 return getAminoAcidMatrix("blosum70"); 162 } 163 164 /** 165 * Returns Blosum 75 matrix by Henikoff & Henikoff 166 * @return Blosum 75 matrix 167 */ 168 public static SubstitutionMatrix<AminoAcidCompound> getBlosum75() { 169 return getAminoAcidMatrix("blosum75"); 170 } 171 172 /** 173 * Returns Blosum 80 matrix by Henikoff & Henikoff 174 * @return Blosum 80 matrix 175 */ 176 public static SubstitutionMatrix<AminoAcidCompound> getBlosum80() { 177 return getAminoAcidMatrix("blosum80"); 178 } 179 180 /** 181 * Returns Blosum 85 matrix by Henikoff & Henikoff 182 * @return Blosum 85 matrix 183 */ 184 public static SubstitutionMatrix<AminoAcidCompound> getBlosum85() { 185 return getAminoAcidMatrix("blosum85"); 186 } 187 188 /** 189 * Returns Blosum 90 matrix by Henikoff & Henikoff 190 * @return Blosum 90 matrix 191 */ 192 public static SubstitutionMatrix<AminoAcidCompound> getBlosum90() { 193 return getAminoAcidMatrix("blosum90"); 194 } 195 196 /** 197 * Returns PAM 250 matrix by Gonnet, Cohen & Benner 198 * @return Gonnet 250 matrix 199 */ 200 public static SubstitutionMatrix<AminoAcidCompound> getGonnet250() { 201 return getAminoAcidMatrix("gonnet250"); 202 } 203 204 /** 205 * Returns Nuc 4.2 matrix by Lowe 206 * Only the first nucleotide sequence to align can contain ambiguous nucleotides 207 * @return Nuc 4.2 matrix 208 */ 209 public static SubstitutionMatrix<NucleotideCompound> getNuc4_2() { 210 return getNucleotideMatrix("nuc-4_2"); 211 } 212 213 /** 214 * Returns Nuc 4.4 matrix by Lowe 215 * Both of the nucleotide sequences to align can contain ambiguous nucleotides 216 * @return Nuc 4.4 matrix 217 */ 218 public static SubstitutionMatrix<NucleotideCompound> getNuc4_4() { 219 return getNucleotideMatrix("nuc-4_4"); 220 } 221 222 /** 223 * Returns PAM 250 matrix by Dayhoff 224 * @return PAM 250 matrix 225 */ 226 public static SubstitutionMatrix<AminoAcidCompound> getPAM250() { 227 return getAminoAcidMatrix("pam250"); 228 } 229 230 // helper methods 231 232 /** 233 * Returns a substitution matrix for {@link AminoAcidCompound amino acids} given by the name {@code name}. 234 * Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath. 235 * If the required matrix does not exist, null is returned. 236 * Example names: 237 * <ul> 238 * <li>blosum62</li> 239 * <li>JOND920103</li> 240 * <li>pam250</li> 241 * <li>gonnet250</li> 242 * </ul> 243 * @param name Either a common name or an AAINDEX name 244 */ 245 public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatrix(String name) { 246 SubstitutionMatrix<AminoAcidCompound> matrix = getMatrixFromAAINDEX(name); 247 if (matrix != null) return matrix; 248 return getAminoAcidMatrix(name); 249 } 250 251 // reads in an amino acid substitution matrix, if necessary 252 private static SubstitutionMatrix<AminoAcidCompound> getAminoAcidMatrix(String file) { 253 if (!aminoAcidMatrices.containsKey(file)) { 254 aminoAcidMatrices.put(file, new SimpleSubstitutionMatrix<AminoAcidCompound>( 255 AminoAcidCompoundSet.getAminoAcidCompoundSet(), getReader(file), file)); 256 } 257 return aminoAcidMatrices.get(file); 258 } 259 260 // reads in a nucleotide substitution matrix, if necessary 261 private static SubstitutionMatrix<NucleotideCompound> getNucleotideMatrix(String file) { 262 if (!nucleotideMatrices.containsKey(file)) { 263 nucleotideMatrices.put(file, new SimpleSubstitutionMatrix<NucleotideCompound>( 264 AmbiguityDNACompoundSet.getDNACompoundSet(), getReader(file), file)); 265 } 266 return nucleotideMatrices.get(file); 267 } 268 269 // reads in a substitution matrix from a resource file 270 private static InputStreamReader getReader(String file) { 271 String resourcePathPrefix = "matrices/"; 272 return new InputStreamReader(SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt", 273 resourcePathPrefix+file))); 274 } 275 276}