001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.alignment.matrices;
022
023import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
026import org.biojava.nbio.core.sequence.template.CompoundSet;
027
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.StringTokenizer;
032
033
034/**
035 * The biojava-alignment module represents substitution matrices with short
036 * values. This is for performance reasons. Some substitution matrices, however,
037 * are provided as float values with up to 2 decimal places.
038 * <p>
039 * In order to be able to use them in the alignment module these are scaled in
040 * order to be able to represent as short values.
041 * The method {@link #getScale()} provides access to the scaling factor.
042 *
043 *
044 * @author Andreas Prlic
045 *
046 */
047public class ScaledSubstitutionMatrix implements
048                SubstitutionMatrix<AminoAcidCompound> {
049
050        private static final String comment = "#";
051
052        private String description, name;
053        private short[][] matrix;
054        private short max, min;
055        private AminoAcidCompoundSet compoundSet;
056
057        private List<AminoAcidCompound> rows, cols;
058
059        private int scale;
060
061        public ScaledSubstitutionMatrix(){
062                compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
063        }
064
065
066
067        public int getScale() {
068                return scale;
069        }
070
071        public void setScale(int scale) {
072                this.scale = scale;
073        }
074
075        @Override
076        public String getDescription() {
077                return description;
078        }
079        @Override
080        public void setDescription(String description) {
081                this.description = description;
082        }
083        @Override
084        public String getName() {
085                return name;
086        }
087        @Override
088        public void setName(String name) {
089                this.name = name;
090        }
091        @Override
092        public short[][] getMatrix() {
093                return matrix;
094        }
095        public void setMatrix(short[][] matrix) {
096                this.matrix = matrix;
097        }
098        public short getMax() {
099                return max;
100        }
101        public void setMax(short max) {
102                this.max = max;
103        }
104        public short getMin() {
105                return min;
106        }
107        public void setMin(short min) {
108                this.min = min;
109        }
110        public List<AminoAcidCompound> getRows() {
111                return rows;
112        }
113        public void setRows(List<AminoAcidCompound> rows) {
114                this.rows = rows;
115        }
116        public List<AminoAcidCompound> getCols() {
117                return cols;
118        }
119        public void setCols(List<AminoAcidCompound> cols) {
120                this.cols = cols;
121        }
122        public static String getComment() {
123                return comment;
124        }
125
126          /**
127         * Returns in a format similar to the standard NCBI files.
128         */
129        @Override
130        public String toString() {
131
132                String newline = System.getProperty("line.separator");
133                StringBuilder s = new StringBuilder();
134
135
136
137                StringTokenizer st = new StringTokenizer(description, newline);
138                while (st.hasMoreTokens()) {
139                        String line = st.nextToken();
140                        if (!line.startsWith(comment)) {
141                                s.append(comment);
142                        }
143                        s.append(String.format("%s%n", line));
144                }
145
146                if ( scale != 1)
147                        s.append("# Matrix scaled by a factor of ").append(scale).append(newline);
148                s.append(getMatrixAsString());
149                return s.toString();
150        }
151
152
153
154        @Override
155        public CompoundSet<AminoAcidCompound> getCompoundSet() {
156                return compoundSet;
157        }
158          @Override
159                public String getMatrixAsString() {
160                        StringBuilder s = new StringBuilder();
161
162
163
164                        int lengthCompound = compoundSet.getMaxSingleCompoundStringLength(), lengthRest =
165                                        Math.max(Math.max(Short.toString(min).length(), Short.toString(max).length()), lengthCompound) + 1;
166
167                        String padCompound = "%" + Integer.toString(lengthCompound) + "s",
168                                        padRest = "%" + Integer.toString(lengthRest);
169
170                        for (int i = 0; i < lengthCompound; i++) {
171                                s.append(" ");
172                        }
173                        for (AminoAcidCompound col : cols) {
174                                s.append(String.format(padRest + "s", compoundSet.getStringForCompound(col)));
175                        }
176                        s.append(String.format("%n"));
177                        for (AminoAcidCompound row : rows) {
178                                s.append(String.format(padCompound, compoundSet.getStringForCompound(row)));
179                                for (AminoAcidCompound col : cols) {
180                                        s.append(String.format(padRest + "d", getValue(row, col)));
181                                }
182                                s.append(String.format("%n"));
183                        }
184                        return s.toString();
185                }
186        @Override
187        public short getMaxValue() {
188                return max;
189        }
190        @Override
191        public short getMinValue() {
192                return min;
193        }
194        @Override
195        public short getValue(AminoAcidCompound from, AminoAcidCompound to) {
196                 int row = rows.indexOf(from), col = cols.indexOf(to);
197                        if (row == -1 || col == -1) {
198                                row = cols.indexOf(from);
199                                col = rows.indexOf(to);
200                                if (row == -1 || col == -1) {
201                                        return min;
202                                }
203                        }
204                        return matrix[row][col];
205
206
207        }
208
209
210        @Override
211        public SubstitutionMatrix<AminoAcidCompound> normalizeMatrix(short scale) {
212                return null;
213        }
214
215
216        @Override
217        public Map<AminoAcidCompound, Short> getRow(AminoAcidCompound row) {
218                int rowIndex = rows.indexOf(row);
219                Map<AminoAcidCompound, Short> map = new HashMap<>();
220                for (int colIndex = 0; colIndex < matrix[rowIndex].length; colIndex++) {
221                        map.put(cols.get(colIndex), matrix[rowIndex][colIndex]);
222                }
223                return map;
224        }
225
226        @Override
227        public Map<AminoAcidCompound, Short> getColumn(AminoAcidCompound column) {
228                int colIndex = cols.indexOf(column);
229                Map<AminoAcidCompound, Short> map = new HashMap<>();
230                for (int i = 0; i < matrix.length; i++) {
231                        map.put(rows.get(i), matrix[i][colIndex]);
232                }
233                return map;
234        }
235
236
237}