001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.alignment.matrices; 022 023import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 024import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 025import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 026 027import java.io.BufferedReader; 028import java.io.IOException; 029import java.io.InputStream; 030import java.io.InputStreamReader; 031import java.util.ArrayList; 032import java.util.HashMap; 033import java.util.List; 034import java.util.Map; 035 036 037 038public class AAIndexFileParser { 039 int scale = -1; 040 041 Map<String,SubstitutionMatrix<AminoAcidCompound>> matrices; 042 043 ScaledSubstitutionMatrix currentMatrix; 044 String currentRows; 045 String currentCols; 046 int currentRowPos; 047 private short[][] matrix; 048 short max; 049 short min; 050 private List<AminoAcidCompound> rows, cols; 051 boolean inMatrix; 052 boolean symmetricMatrix ; 053 054 055 public AAIndexFileParser(){ 056 matrices = new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>(); 057 } 058 059 /** parse an inputStream that points to an AAINDEX database file 060 * 061 * @param inputStream 062 * @throws IOException 063 */ 064 public void parse(InputStream inputStream) throws IOException { 065 066 currentMatrix = null; 067 currentRows = ""; 068 currentCols = ""; 069 max = Short.MIN_VALUE; 070 min = Short.MAX_VALUE; 071 inMatrix = false; 072 073 BufferedReader buf = new BufferedReader (new InputStreamReader (inputStream)); 074 String line = null; 075 line = buf.readLine(); 076 077 while ( line != null ) { 078 079 if ( line.startsWith("//")) { 080 finalizeMatrix(); 081 inMatrix = false; 082 083 } else if ( line.startsWith("H ")){ 084 // a new matric! 085 newMatrix(line); 086 } else if ( line.startsWith("D ")) { 087 currentMatrix.setDescription(line.substring(2)); 088 } else if ( line.startsWith("M ")) { 089 initMatrix(line); 090 inMatrix = true; 091 } else if ( line.startsWith(" ")){ 092 if ( inMatrix) 093 processScores(line); 094 } 095 096 line = buf.readLine(); 097 } 098 099 } 100 101 102 // process a line such as > -0.3 1.6 0.7 0.8 -2.6 3.0< 103 private void processScores(String line) { 104 105 String[] values = line.trim().split(" +"); 106 107 // increment the current row we are talking about 108 currentRowPos++; 109 110 111 112 for ( int i =0 ; i < values.length ; i++){ 113 114 if ( values[i].endsWith(".")) { 115 values[i] = values[i] + "0"; 116 } 117 118 // special case: MEHP950101 119 if (values[i].equals("-")) { 120 values[i] = "0"; 121 } 122 123 if ( scale == -1 ) { 124 scale = determineScale(values[0]); 125 } 126 127 128 Float score = Float.parseFloat(values[i]); 129 score = scale * score; 130 131 Short s = (short) Math.round(score); 132 133 matrix[currentRowPos][i] = s; 134 135 if ( values.length < cols.size() || ( symmetricMatrix)){ 136 //System.out.println(values.length + " " + cols.size() + " " + currentRowPos + " " + i + " " + line); 137 138 matrix[i][currentRowPos] = s; 139 140 symmetricMatrix = true; 141 142 } 143 144 if ( score > max) 145 max = s; 146 if ( score < min) 147 min = s; 148 149 150 } 151 } 152 153 private int determineScale(String value) { 154 155 String[] spl = value.split("\\."); 156 157 if (spl.length <= 1) 158 return 1; 159 160 String digits = spl[1]; 161 162 return (int)Math.round(Math.pow(10, digits.length())); 163 164 } 165 166 // process a line of type >M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV< 167 private void initMatrix(String line) { 168 String[] spl = line.split(" "); 169 170 // trim off the final , character 171 currentRows = spl[3].substring(0, spl[3].length()-1); 172 currentCols = spl[6]; 173 currentRowPos = -1; 174 175 int nrRows = currentRows.length(); 176 int nrCols = currentCols.length(); 177 178 matrix = new short[nrRows][nrCols]; 179 180 rows = new ArrayList<AminoAcidCompound>(); 181 cols = new ArrayList<AminoAcidCompound>(); 182 183 184 //System.out.println(">" + currentRows+"<"); 185 AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 186 for ( int i = 0 ; i < currentRows.length() ; i ++){ 187 char c = currentRows.charAt(i); 188 AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); 189 190 rows.add(aa); 191 } 192 193 for ( int i = 0 ; i < currentCols.length() ; i ++){ 194 char c = currentRows.charAt(i); 195 AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); 196 197 cols.add(aa); 198 } 199 200 201 202 203 204 currentMatrix.setScale(scale); 205 } 206 207 208 private void newMatrix(String line) { 209 symmetricMatrix = false; 210 scale = -1; 211 212 currentMatrix = new ScaledSubstitutionMatrix(); 213 currentMatrix.setName(line.substring(2)); 214 215 216 //System.out.println("new Matrix " + currentMatrix.getName()); 217 } 218 219 // 220 private SubstitutionMatrix<AminoAcidCompound> finalizeMatrix() { 221 222 currentMatrix.setMatrix(matrix); 223 currentMatrix.setMax(max); 224 currentMatrix.setMin(min); 225 currentMatrix.setCols(cols); 226 currentMatrix.setRows(rows); 227 currentMatrix.setScale(scale); 228 matrices.put(currentMatrix.getName(), currentMatrix); 229 230 return currentMatrix; 231 232 } 233 234 public Map<String, SubstitutionMatrix<AminoAcidCompound>> getMatrices() { 235 return matrices; 236 } 237}