001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.alignment.matrices;
022
023import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
026
027import java.io.BufferedReader;
028import java.io.IOException;
029import java.io.InputStream;
030import java.io.InputStreamReader;
031import java.util.ArrayList;
032import java.util.HashMap;
033import java.util.List;
034import java.util.Map;
035
036
037
038public class AAIndexFileParser {
039        int scale = -1;
040
041        Map<String,SubstitutionMatrix<AminoAcidCompound>> matrices;
042
043        ScaledSubstitutionMatrix currentMatrix;
044        String currentRows;
045        String currentCols;
046        int currentRowPos;
047        private short[][] matrix;
048        short max;
049        short min;
050        private List<AminoAcidCompound> rows, cols;
051        boolean inMatrix;
052        boolean symmetricMatrix ;
053
054        public AAIndexFileParser(){
055                matrices  = new HashMap<>();
056        }
057
058        /** Parse an inputStream that points to an AAINDEX database file
059         *
060         * @param inputStream
061         * @throws IOException
062         */
063        public void parse(InputStream inputStream) throws IOException {
064
065                currentMatrix = null;
066                currentRows = "";
067                currentCols = "";
068                max = Short.MIN_VALUE;
069                min = Short.MAX_VALUE;
070                inMatrix = false;
071
072                BufferedReader buf = new BufferedReader (new InputStreamReader (inputStream));
073                String line = null;
074                line = buf.readLine();
075
076                while (  line != null ) {
077                        if ( line.startsWith("//")) {
078                                finalizeMatrix();
079                                inMatrix = false;
080
081                        } else if ( line.startsWith("H ")){
082                                // a new matrix!
083                                newMatrix(line);
084                        } else if ( line.startsWith("D ")) {
085                                currentMatrix.setDescription(line.substring(2));
086                        } else if ( line.startsWith("M ")) {
087                                initMatrix(line);
088                                inMatrix = true;
089                        } else if ( line.startsWith("  ")){
090                                if ( inMatrix)
091                                        processScores(line);
092                        }
093                        line = buf.readLine();
094                }
095        }
096
097        //  process a line such as >    -0.3     1.6     0.7     0.8    -2.6     3.0<
098        private void processScores(String line) {
099                String[] values = line.trim().split(" +");
100
101                // increment the current row we are talking about
102                currentRowPos++;
103
104                for ( int i =0 ; i < values.length ; i++){
105                        if ( values[i].endsWith(".")) {
106                                values[i] = values[i] + "0";
107                        }
108
109                        // special case: MEHP950101
110                        if ("-".equals(values[i])) {
111                                values[i] = "0";
112                        }
113                        if ( scale == -1 ) {
114                                scale = determineScale(values[0]);
115                        }
116
117                        Float score = Float.parseFloat(values[i]);
118                        score = scale * score;
119
120                        Short s = (short) Math.round(score);
121                        matrix[currentRowPos][i] = s;
122
123                        if ( values.length < cols.size() || ( symmetricMatrix)){
124                                //System.out.println(values.length + " " + cols.size() + " " + currentRowPos + " " + i + " " +  line);
125                                matrix[i][currentRowPos] = s;
126                                symmetricMatrix = true;
127                        }
128                        if ( score > max)
129                                max = s;
130                        if ( score < min)
131                                min = s;
132                }
133        }
134
135        private int determineScale(String value) {
136                String[] spl = value.split("\\.");
137                if (spl.length <= 1)
138                        return 1;
139                String digits = spl[1];
140                return (int)Math.round(Math.pow(10, digits.length()));
141        }
142
143        // process a line of type >M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV<
144        private void initMatrix(String line) {
145                String[] spl = line.split(" ");
146
147                // trim off the final , character
148                currentRows = spl[3].substring(0, spl[3].length()-1);
149                currentCols = spl[6];
150                currentRowPos = -1;
151
152                int nrRows = currentRows.length();
153                int nrCols = currentCols.length();
154
155                matrix = new short[nrRows][nrCols];
156
157                rows = new ArrayList<>();
158                cols = new ArrayList<>();
159
160                //System.out.println(">" + currentRows+"<");
161                AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
162                for ( int i = 0 ; i < currentRows.length() ; i ++){
163                        char c = currentRows.charAt(i);
164                        AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
165                        rows.add(aa);
166                }
167
168                for ( int i = 0 ; i < currentCols.length() ; i ++){
169                        char c = currentRows.charAt(i);
170                        AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
171                        cols.add(aa);
172                }
173                currentMatrix.setScale(scale);
174        }
175
176
177        private void newMatrix(String line) {
178                symmetricMatrix = false;
179                scale = -1;
180
181                currentMatrix = new ScaledSubstitutionMatrix();
182                currentMatrix.setName(line.substring(2));
183
184
185                //System.out.println("new Matrix " + currentMatrix.getName());
186        }
187
188        //
189        private SubstitutionMatrix<AminoAcidCompound> finalizeMatrix() {
190
191                currentMatrix.setMatrix(matrix);
192                currentMatrix.setMax(max);
193                currentMatrix.setMin(min);
194                currentMatrix.setCols(cols);
195                currentMatrix.setRows(rows);
196                currentMatrix.setScale(scale);
197                matrices.put(currentMatrix.getName(), currentMatrix);
198
199                return currentMatrix;
200
201        }
202
203        public Map<String, SubstitutionMatrix<AminoAcidCompound>> getMatrices() {
204                return matrices;
205        }
206}