001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on July 26, 2010 021 * Author: Mark Chapman 022 */ 023 024package org.biojava.nbio.core.alignment.matrices; 025 026import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 027import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; 028import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 029import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 030import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 031 032import java.io.InputStream; 033import java.io.InputStreamReader; 034import java.io.Serializable; 035import java.util.HashMap; 036import java.util.Map; 037 038/** 039 * Static utility to access substitution matrices that come bundled with BioJava. All matrices were downloaded from 040 * ftp://ftp.ncbi.nih.gov/blast/matrices/ 041 * 042 * @author Mark Chapman 043 * @author Paolo Pavan 044 */ 045public class SubstitutionMatrixHelper implements Serializable { 046 047 /** 048 * 049 */ 050 private static final long serialVersionUID = 148491724604653225L; 051 052 private static Map<String, SubstitutionMatrix<AminoAcidCompound>> aminoAcidMatrices = 053 new HashMap<>(); 054 private static Map<String, SubstitutionMatrix<NucleotideCompound>> nucleotideMatrices = 055 new HashMap<>(); 056 057 // prevents instantiation 058 private SubstitutionMatrixHelper() { } 059 060 061 /** Returns any matrix from the AAINDEX database file 062 * 063 * @param matrixName 064 * @return a {@link SubstitutionMatrix} 065 */ 066 public static SubstitutionMatrix<AminoAcidCompound> getMatrixFromAAINDEX(String matrixName){ 067 068 return AAindexFactory.getAAIndexProvider().getMatrix(matrixName); 069 070 } 071 072 /** 073 * Gets identity matrix where matches score 1 and mismatches score -10000 074 * @return 075 */ 076 public static SubstitutionMatrix<AminoAcidCompound> getIdentity() { 077 return getAminoAcidMatrix("identity"); 078 } 079 080 /** 081 * Returns Blosum 100 matrix by Henikoff & Henikoff 082 * @return Blosum 100 matrix 083 */ 084 public static SubstitutionMatrix<AminoAcidCompound> getBlosum100() { 085 return getAminoAcidMatrix("blosum100"); 086 } 087 088 /** 089 * Returns Blosum 30 matrix by Henikoff & Henikoff 090 * @return Blosum 30 matrix 091 */ 092 public static SubstitutionMatrix<AminoAcidCompound> getBlosum30() { 093 return getAminoAcidMatrix("blosum30"); 094 } 095 096 /** 097 * Returns Blosum 35 matrix by Henikoff & Henikoff 098 * @return Blosum 35 matrix 099 */ 100 public static SubstitutionMatrix<AminoAcidCompound> getBlosum35() { 101 return getAminoAcidMatrix("blosum35"); 102 } 103 104 /** 105 * Returns Blosum 40 matrix by Henikoff & Henikoff 106 * @return Blosum 40 matrix 107 */ 108 public static SubstitutionMatrix<AminoAcidCompound> getBlosum40() { 109 return getAminoAcidMatrix("blosum40"); 110 } 111 112 /** 113 * Returns Blosum 45 matrix by Henikoff & Henikoff 114 * @return Blosum 45 matrix 115 */ 116 public static SubstitutionMatrix<AminoAcidCompound> getBlosum45() { 117 return getAminoAcidMatrix("blosum45"); 118 } 119 120 /** 121 * Returns Blosum 50 matrix by Henikoff & Henikoff 122 * @return Blosum 50 matrix 123 */ 124 public static SubstitutionMatrix<AminoAcidCompound> getBlosum50() { 125 return getAminoAcidMatrix("blosum50"); 126 } 127 128 /** 129 * Returns Blosum 55 matrix by Henikoff & Henikoff 130 * @return Blosum 55 matrix 131 */ 132 public static SubstitutionMatrix<AminoAcidCompound> getBlosum55() { 133 return getAminoAcidMatrix("blosum55"); 134 } 135 136 /** 137 * Returns Blosum 60 matrix by Henikoff & Henikoff 138 * @return Blosum 60 matrix 139 */ 140 public static SubstitutionMatrix<AminoAcidCompound> getBlosum60() { 141 return getAminoAcidMatrix("blosum60"); 142 } 143 144 /** 145 * Returns Blosum 62 matrix by Henikoff & Henikoff 146 * @return Blosum 62 matrix 147 */ 148 public static SubstitutionMatrix<AminoAcidCompound> getBlosum62() { 149 return getAminoAcidMatrix("blosum62"); 150 } 151 152 /** 153 * Returns Blosum 65 matrix by Henikoff & Henikoff 154 * @return Blosum 65 matrix 155 */ 156 public static SubstitutionMatrix<AminoAcidCompound> getBlosum65() { 157 return getAminoAcidMatrix("blosum65"); 158 } 159 160 /** 161 * Returns Blosum 70 matrix by Henikoff & Henikoff 162 * @return Blosum 70 matrix 163 */ 164 public static SubstitutionMatrix<AminoAcidCompound> getBlosum70() { 165 return getAminoAcidMatrix("blosum70"); 166 } 167 168 /** 169 * Returns Blosum 75 matrix by Henikoff & Henikoff 170 * @return Blosum 75 matrix 171 */ 172 public static SubstitutionMatrix<AminoAcidCompound> getBlosum75() { 173 return getAminoAcidMatrix("blosum75"); 174 } 175 176 /** 177 * Returns Blosum 80 matrix by Henikoff & Henikoff 178 * @return Blosum 80 matrix 179 */ 180 public static SubstitutionMatrix<AminoAcidCompound> getBlosum80() { 181 return getAminoAcidMatrix("blosum80"); 182 } 183 184 /** 185 * Returns Blosum 85 matrix by Henikoff & Henikoff 186 * @return Blosum 85 matrix 187 */ 188 public static SubstitutionMatrix<AminoAcidCompound> getBlosum85() { 189 return getAminoAcidMatrix("blosum85"); 190 } 191 192 /** 193 * Returns Blosum 90 matrix by Henikoff & Henikoff 194 * @return Blosum 90 matrix 195 */ 196 public static SubstitutionMatrix<AminoAcidCompound> getBlosum90() { 197 return getAminoAcidMatrix("blosum90"); 198 } 199 200 /** 201 * Returns PAM 250 matrix by Gonnet, Cohen & Benner 202 * @return Gonnet 250 matrix 203 */ 204 public static SubstitutionMatrix<AminoAcidCompound> getGonnet250() { 205 return getAminoAcidMatrix("gonnet250"); 206 } 207 208 /** 209 * Returns Nuc 4.2 matrix by Lowe 210 * Only the first nucleotide sequence to align can contain ambiguous nucleotides 211 * @return Nuc 4.2 matrix 212 */ 213 public static SubstitutionMatrix<NucleotideCompound> getNuc4_2() { 214 return getNucleotideMatrix("nuc-4_2"); 215 } 216 217 /** 218 * Returns Nuc 4.4 matrix by Lowe 219 * Both of the nucleotide sequences to align can contain ambiguous nucleotides 220 * @return Nuc 4.4 matrix 221 */ 222 public static SubstitutionMatrix<NucleotideCompound> getNuc4_4() { 223 return getNucleotideMatrix("nuc-4_4"); 224 } 225 226 /** 227 * Returns PAM 250 matrix by Dayhoff 228 * @return PAM 250 matrix 229 */ 230 public static SubstitutionMatrix<AminoAcidCompound> getPAM250() { 231 return getAminoAcidMatrix("pam250"); 232 } 233 234 // helper methods 235 236 /** 237 * Returns a substitution matrix for {@link AminoAcidCompound amino acids} given by the name {@code name}. 238 * Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath 239 * in src/main/resources/matrices. 240 * Example names: 241 * <ul> 242 * <li>blosum62</li> 243 * <li>JOND920103</li> 244 * <li>pam250</li> 245 * <li>gonnet250</li> 246 * </ul> 247 * @param name Either a common name or an AAINDEX name 248 * @return a {@code} SubstitutionMatrix {@code} or {@code}null{@code} if no matrix is found 249 */ 250 public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatrix(String name) { 251 SubstitutionMatrix<AminoAcidCompound> matrix = getMatrixFromAAINDEX(name); 252 if (matrix != null) return matrix; 253 return getAminoAcidMatrix(name); 254 } 255 256 // reads in an amino acid substitution matrix, if necessary 257 private static SubstitutionMatrix<AminoAcidCompound> getAminoAcidMatrix(String file) { 258 if (!aminoAcidMatrices.containsKey(file)) { 259 InputStreamReader reader = getReader(file); 260 if (reader == null) { 261 return null; 262 } 263 aminoAcidMatrices.put(file, new SimpleSubstitutionMatrix<AminoAcidCompound>( 264 AminoAcidCompoundSet.getAminoAcidCompoundSet(), reader , file)); 265 } 266 return aminoAcidMatrices.get(file); 267 } 268 269 // reads in a nucleotide substitution matrix, if necessary 270 private static SubstitutionMatrix<NucleotideCompound> getNucleotideMatrix(String file) { 271 if (!nucleotideMatrices.containsKey(file)) { 272 nucleotideMatrices.put(file, new SimpleSubstitutionMatrix<NucleotideCompound>( 273 AmbiguityDNACompoundSet.getDNACompoundSet(), getReader(file), file)); 274 } 275 return nucleotideMatrices.get(file); 276 } 277 278 // reads in a substitution matrix from a resource file 279 private static InputStreamReader getReader(String file) { 280 String resourcePathPrefix = "matrices/"; 281 InputStream is = SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt", 282 resourcePathPrefix+file)); 283 if (is == null) { 284 return null; 285 } 286 return new InputStreamReader(is); 287 } 288 289}