001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.alignment.matrices; 022 023import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 026import org.biojava.nbio.core.sequence.template.CompoundSet; 027 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.StringTokenizer; 032 033 034/** 035 * The biojava-alignment module represents substitution matrices with short 036 * values. This is for performance reasons. Some substitution matrices, however, 037 * are provided as float values with up to 2 decimal places. 038 * <p> 039 * In order to be able to use them in the alignment module these are scaled in 040 * order to be able to represent as short values. 041 * The method {@link #getScale()} provides access to the scaling factor. 042 * 043 * 044 * @author Andreas Prlic 045 * 046 */ 047public class ScaledSubstitutionMatrix implements 048 SubstitutionMatrix<AminoAcidCompound> { 049 050 private static final String comment = "#"; 051 052 private String description, name; 053 private short[][] matrix; 054 private short max, min; 055 private AminoAcidCompoundSet compoundSet; 056 057 private List<AminoAcidCompound> rows, cols; 058 059 private int scale; 060 061 public ScaledSubstitutionMatrix(){ 062 compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 063 } 064 065 066 067 public int getScale() { 068 return scale; 069 } 070 071 public void setScale(int scale) { 072 this.scale = scale; 073 } 074 075 @Override 076 public String getDescription() { 077 return description; 078 } 079 @Override 080 public void setDescription(String description) { 081 this.description = description; 082 } 083 @Override 084 public String getName() { 085 return name; 086 } 087 @Override 088 public void setName(String name) { 089 this.name = name; 090 } 091 @Override 092 public short[][] getMatrix() { 093 return matrix; 094 } 095 public void setMatrix(short[][] matrix) { 096 this.matrix = matrix; 097 } 098 public short getMax() { 099 return max; 100 } 101 public void setMax(short max) { 102 this.max = max; 103 } 104 public short getMin() { 105 return min; 106 } 107 public void setMin(short min) { 108 this.min = min; 109 } 110 public List<AminoAcidCompound> getRows() { 111 return rows; 112 } 113 public void setRows(List<AminoAcidCompound> rows) { 114 this.rows = rows; 115 } 116 public List<AminoAcidCompound> getCols() { 117 return cols; 118 } 119 public void setCols(List<AminoAcidCompound> cols) { 120 this.cols = cols; 121 } 122 public static String getComment() { 123 return comment; 124 } 125 126 /** 127 * Returns in a format similar to the standard NCBI files. 128 */ 129 @Override 130 public String toString() { 131 132 String newline = System.getProperty("line.separator"); 133 StringBuilder s = new StringBuilder(); 134 135 136 137 StringTokenizer st = new StringTokenizer(description, newline); 138 while (st.hasMoreTokens()) { 139 String line = st.nextToken(); 140 if (!line.startsWith(comment)) { 141 s.append(comment); 142 } 143 s.append(String.format("%s%n", line)); 144 } 145 146 if ( scale != 1) 147 s.append("# Matrix scaled by a factor of ").append(scale).append(newline); 148 s.append(getMatrixAsString()); 149 return s.toString(); 150 } 151 152 153 154 @Override 155 public CompoundSet<AminoAcidCompound> getCompoundSet() { 156 return compoundSet; 157 } 158 @Override 159 public String getMatrixAsString() { 160 StringBuilder s = new StringBuilder(); 161 162 163 164 int lengthCompound = compoundSet.getMaxSingleCompoundStringLength(), lengthRest = 165 Math.max(Math.max(Short.toString(min).length(), Short.toString(max).length()), lengthCompound) + 1; 166 167 String padCompound = "%" + Integer.toString(lengthCompound) + "s", 168 padRest = "%" + Integer.toString(lengthRest); 169 170 for (int i = 0; i < lengthCompound; i++) { 171 s.append(" "); 172 } 173 for (AminoAcidCompound col : cols) { 174 s.append(String.format(padRest + "s", compoundSet.getStringForCompound(col))); 175 } 176 s.append(String.format("%n")); 177 for (AminoAcidCompound row : rows) { 178 s.append(String.format(padCompound, compoundSet.getStringForCompound(row))); 179 for (AminoAcidCompound col : cols) { 180 s.append(String.format(padRest + "d", getValue(row, col))); 181 } 182 s.append(String.format("%n")); 183 } 184 return s.toString(); 185 } 186 @Override 187 public short getMaxValue() { 188 return max; 189 } 190 @Override 191 public short getMinValue() { 192 return min; 193 } 194 @Override 195 public short getValue(AminoAcidCompound from, AminoAcidCompound to) { 196 int row = rows.indexOf(from), col = cols.indexOf(to); 197 if (row == -1 || col == -1) { 198 row = cols.indexOf(from); 199 col = rows.indexOf(to); 200 if (row == -1 || col == -1) { 201 return min; 202 } 203 } 204 return matrix[row][col]; 205 206 207 } 208 209 210 @Override 211 public SubstitutionMatrix<AminoAcidCompound> normalizeMatrix(short scale) { 212 return null; 213 } 214 215 216 @Override 217 public Map<AminoAcidCompound, Short> getRow(AminoAcidCompound row) { 218 int rowIndex = rows.indexOf(row); 219 Map<AminoAcidCompound, Short> map = new HashMap<>(); 220 for (int colIndex = 0; colIndex < matrix[rowIndex].length; colIndex++) { 221 map.put(cols.get(colIndex), matrix[rowIndex][colIndex]); 222 } 223 return map; 224 } 225 226 @Override 227 public Map<AminoAcidCompound, Short> getColumn(AminoAcidCompound column) { 228 int colIndex = cols.indexOf(column); 229 Map<AminoAcidCompound, Short> map = new HashMap<>(); 230 for (int i = 0; i < matrix.length; i++) { 231 map.put(rows.get(i), matrix[i][colIndex]); 232 } 233 return map; 234 } 235 236 237}