001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.alignment.matrices;
022
023import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
026
027import java.io.BufferedReader;
028import java.io.IOException;
029import java.io.InputStream;
030import java.io.InputStreamReader;
031import java.util.ArrayList;
032import java.util.HashMap;
033import java.util.List;
034import java.util.Map;
035
036
037
038public class AAIndexFileParser {
039        int scale = -1;
040
041        Map<String,SubstitutionMatrix<AminoAcidCompound>> matrices;
042
043        ScaledSubstitutionMatrix currentMatrix;
044        String currentRows;
045        String currentCols;
046        int currentRowPos;
047        private short[][] matrix;
048        short max;
049        short min;
050        private List<AminoAcidCompound> rows, cols;
051        boolean inMatrix;
052        boolean symmetricMatrix ;
053
054
055        public AAIndexFileParser(){
056                matrices  = new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>();
057        }
058
059        /** parse an inputStream that points to an AAINDEX database file
060         *
061         * @param inputStream
062         * @throws IOException
063         */
064        public void parse(InputStream inputStream) throws IOException {
065
066                currentMatrix = null;
067                currentRows = "";
068                currentCols = "";
069                max = Short.MIN_VALUE;
070                min = Short.MAX_VALUE;
071                inMatrix = false;
072
073                BufferedReader buf = new BufferedReader (new InputStreamReader (inputStream));
074                String line = null;
075                line = buf.readLine();
076
077                while (  line != null ) {
078
079                        if ( line.startsWith("//")) {
080                                finalizeMatrix();
081                                inMatrix = false;
082
083                        } else if ( line.startsWith("H ")){
084                                // a new matric!
085                                newMatrix(line);
086                        } else if ( line.startsWith("D ")) {
087                                currentMatrix.setDescription(line.substring(2));
088                        } else if ( line.startsWith("M ")) {
089                                initMatrix(line);
090                                inMatrix = true;
091                        } else if ( line.startsWith("  ")){
092                                if ( inMatrix)
093                                        processScores(line);
094                        }
095
096                        line = buf.readLine();
097                }
098
099        }
100
101
102        //  process a line such as >    -0.3     1.6     0.7     0.8    -2.6     3.0<
103        private void processScores(String line) {
104
105                String[] values = line.trim().split(" +");
106
107                // increment the current row we are talking about
108                currentRowPos++;
109
110
111
112                for ( int i =0 ; i < values.length ; i++){
113
114                        if ( values[i].endsWith(".")) {
115                                values[i] = values[i] + "0";
116                        }
117
118                        // special case: MEHP950101
119                        if (values[i].equals("-")) {
120                                values[i] = "0";
121                        }
122
123                        if ( scale == -1 ) {
124                                scale = determineScale(values[0]);
125                        }
126
127
128                        Float score = Float.parseFloat(values[i]);
129                        score = scale * score;
130
131                        Short s = (short) Math.round(score);
132
133                        matrix[currentRowPos][i] = s;
134
135                        if ( values.length < cols.size() || ( symmetricMatrix)){
136                                //System.out.println(values.length + " " + cols.size() + " " + currentRowPos + " " + i + " " +  line);
137
138                                matrix[i][currentRowPos] = s;
139
140                                symmetricMatrix = true;
141
142                        }
143
144                        if ( score > max)
145                                max = s;
146                        if ( score < min)
147                                min = s;
148
149
150                }
151        }
152
153        private int determineScale(String value) {
154
155                String[] spl = value.split("\\.");
156
157                if (spl.length <= 1)
158                        return 1;
159
160                String digits = spl[1];
161
162                return (int)Math.round(Math.pow(10, digits.length()));
163
164        }
165
166        // process a line of type >M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV<
167        private void initMatrix(String line) {
168                String[] spl = line.split(" ");
169
170                // trim off the final , character
171                currentRows = spl[3].substring(0, spl[3].length()-1);
172                currentCols = spl[6];
173                currentRowPos = -1;
174
175                int nrRows = currentRows.length();
176                int nrCols = currentCols.length();
177
178                matrix = new short[nrRows][nrCols];
179
180                rows = new ArrayList<AminoAcidCompound>();
181                cols = new ArrayList<AminoAcidCompound>();
182
183
184                //System.out.println(">" + currentRows+"<");
185                AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
186                for ( int i = 0 ; i < currentRows.length() ; i ++){
187                        char c = currentRows.charAt(i);
188                        AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
189
190                        rows.add(aa);
191                }
192
193                for ( int i = 0 ; i < currentCols.length() ; i ++){
194                        char c = currentRows.charAt(i);
195                        AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
196
197                        cols.add(aa);
198                }
199
200
201
202
203
204                currentMatrix.setScale(scale);
205        }
206
207
208        private void newMatrix(String line) {
209                symmetricMatrix = false;
210                scale = -1;
211
212                currentMatrix = new ScaledSubstitutionMatrix();
213                currentMatrix.setName(line.substring(2));
214
215
216                //System.out.println("new Matrix " + currentMatrix.getName());
217        }
218
219        //
220        private SubstitutionMatrix<AminoAcidCompound> finalizeMatrix() {
221
222                currentMatrix.setMatrix(matrix);
223                currentMatrix.setMax(max);
224                currentMatrix.setMin(min);
225                currentMatrix.setCols(cols);
226                currentMatrix.setRows(rows);
227                currentMatrix.setScale(scale);
228                matrices.put(currentMatrix.getName(), currentMatrix);
229
230                return currentMatrix;
231
232        }
233
234        public Map<String, SubstitutionMatrix<AminoAcidCompound>> getMatrices() {
235                return matrices;
236        }
237}