001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022/* 023 * AAindexStreamReader.java 024 */ 025package org.biojava.bio.proteomics.aaindex; 026 027import java.io.BufferedReader; 028import java.io.IOException; 029import java.io.Reader; 030import java.util.Map; 031import java.util.NoSuchElementException; 032 033import org.biojava.bio.BioException; 034import org.biojava.bio.seq.io.SymbolTokenization; 035import org.biojava.bio.symbol.Symbol; 036import org.biojava.bio.symbol.SymbolPropertyTable; 037 038/** 039 * Iterator over {@link org.biojava.bio.proteomics.aaindex.AAindex} objects that 040 * are stored in a stream in the AAindex1 file format. The format 041 * of such an Amino Acid Index Database file is described in the 042 * <a href="http://www.genome.ad.jp/dbget-bin/show_man?aaindex">AAindex manual 043 * </a>. The {@link #nextTable()} method returns objects of type 044 * {@link org.biojava.bio.proteomics.aaindex.AAindex}. See this class also for 045 * further informations. To hold an AAindex1 file in memory for random access 046 * use the {@link org.biojava.bio.proteomics.aaindex.SimpleSymbolPropertyTableDB} 047 * class: 048 * <pre> 049 * SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB( 050 * new AAindexStreamReader(new FileReader("aaindex1"))); 051 * AAindex hydrophobicity = (AAindex) db.table("CIDH920105"); 052 * SymbolList symbols = ProteinTools.createProtein( 053 * "ARNDCEQGHILKMFPSTWYV"); 054 * double hp = 0.0; 055 * for (int i = 1; i <= symbols.length(); i++) { 056 * hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i)); 057 * } 058 * System.out.println("Average hydrophobicity: " + Double.toString( 059 * hp / symbols.length())); 060 * </pre> 061 * <p><b>References:</b></p> 062 * <p><a href="http://www.genome.ad.jp/dbget/aaindex.html">AAindex web 063 * site</a>.</p> 064 * <p>Kawashima, S. and Kanehisa, M.; AAindex: amino acid index database. 065 * Nucleic Acids Res. 28, 374 (2000).</p> 066 * <p>Tomii, K. and Kanehisa, M.; Analysis of amino acid indices and mutation 067 * matrices for sequence comparison and structure prediction of proteins. 068 * Protein Eng. 9, 27-36 (1996).</p> 069 * <p>Nakai, K., Kidera, A., and Kanehisa, M.; Cluster analysis of amino acid 070 * indices for prediction of protein structure and function. 071 * Protein Eng. 2, 93-100 (1988)</p> 072 * @author <a href="mailto:Martin.Szugat@GMX.net">Martin Szugat</a> 073 * @version $Revision$ 074 */ 075public class AAindexStreamReader implements SymbolPropertyTableIterator { 076 077// public static final void main(String[] args) throws NullPointerException, 078// FileNotFoundException, BioException, IOException { 079// SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB( 080// new AAindexStreamReader(new FileReader("aaindex1"))); 081// AAindex hydrophobicity = (AAindex) db.table("CIDH920105"); 082// SymbolList symbols = ProteinTools.createProtein( 083// "ARNDCEQGHILKMFPSTWYV"); 084// double hp = 0.0; 085// for (int i = 1; i <= symbols.length(); i++) { 086// hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i)); 087// } 088// System.out.println("Average hydrophobicity: " + Double.toString( 089// hp / symbols.length())); 090// } 091// 092 /* PRIVATE CONSTANTS */ 093 094 /** 095 * Name of the tokenizer. 096 */ 097 private static final String TOKENIZER = "token"; 098 099 /* STATIC FIELDS */ 100 101 /** 102 * List of amino acid symbols. 103 */ 104 private static Symbol[] aa = null; 105 106 /* STATIC CONSTRUCTOR */ 107 108 static { 109 try { 110 SymbolTokenization tokenizer = 111 AAindex.PROTEIN_ALPHABET.getTokenization(TOKENIZER); 112 aa = new Symbol[] {tokenizer.parseToken("A"), 113 tokenizer.parseToken("R"), tokenizer.parseToken("N"), 114 tokenizer.parseToken("D"), tokenizer.parseToken("C"), 115 tokenizer.parseToken("Q"), tokenizer.parseToken("E"), 116 tokenizer.parseToken("G"), tokenizer.parseToken("H"), 117 tokenizer.parseToken("I"), tokenizer.parseToken("L"), 118 tokenizer.parseToken("K"), tokenizer.parseToken("M"), 119 tokenizer.parseToken("F"), tokenizer.parseToken("P"), 120 tokenizer.parseToken("S"), tokenizer.parseToken("T"), 121 tokenizer.parseToken("W"), tokenizer.parseToken("Y"), 122 tokenizer.parseToken("V"), }; 123 } catch (BioException e) { 124 e.printStackTrace(); 125 } catch (IndexOutOfBoundsException e) { 126 e.printStackTrace(); 127 } 128 }; 129 130 /* PRIVATE FIELDS */ 131 132 /** 133 * The internal reader. 134 */ 135 private BufferedReader reader = null; 136 137 /** 138 * The current read line. 139 */ 140 private String line = null; 141 142 /** 143 * The key char of the current read section. 144 */ 145 private char keyChar; 146 147 /** 148 * The value of the current read section. 149 */ 150 private String stringValue; 151 152 /* PUBLIC CONSTRUCTORS */ 153 154 /** 155 * Initializes the iterator. 156 * @param reader reader over a stream in the AAindex file format. 157 * @throws IOException if the stream could not be read. 158 * @throws NullPointerException if <code>reader</code> is <code>null</code>. 159 */ 160 public AAindexStreamReader(Reader reader) throws IOException, 161 NullPointerException { 162 this(new BufferedReader(reader)); 163 } 164 165 /** 166 * Initializes the iterator. 167 * @param reader buffered reader over a stream in the AAindex file format. 168 * @throws IOException if the stream could not be read. 169 * @throws NullPointerException if <code>reader</code> is <code>null</code>. 170 */ 171 public AAindexStreamReader(BufferedReader reader) throws IOException, 172 NullPointerException { 173 if (reader == null) { 174 throw new NullPointerException("reader is null."); 175 } 176 this.reader = reader; 177 line = reader.readLine(); 178 } 179 180 /* PUBLIC METHODS */ 181 182 /** 183 * Checks if the end of the file or stream is reached. 184 * @return <code>true</code> if the end of the file is reached, 185 * <code>false</code> otherwise. 186 */ 187 public boolean eof() { 188 if (line == null) { 189 return true; 190 } else { 191 while (line != null && line.length() == 0) { 192 try { 193 line = reader.readLine(); 194 } catch (IOException e) { 195 return true; 196 } 197 } 198 return (line == null); 199 } 200 } 201 202 /** 203 * Reads a AAindex section. 204 * @throws BioException if the section could not be read. 205 */ 206 private void readSection() throws BioException { 207 208 keyChar = line.charAt(0); 209 210 StringBuffer stringBuffer = new StringBuffer(); 211 212 do { 213 if (line.length() > 2) { 214 stringBuffer.append(line.substring(2)); 215 if (!line.endsWith(" ")) { 216 stringBuffer.append(" "); 217 } 218 } 219 try { 220 line = reader.readLine(); 221 } catch (IOException e) { 222 throw new BioException(e); 223 } 224 } while (!eof() && line.charAt(0) == ' '); 225 226 stringValue = stringBuffer.toString(); 227 } 228 229 /* INTERFACE SymbolPropertyTableIterator */ 230 231 /** 232 * {@inheritDoc} 233 */ 234 public boolean hasNext() { 235 return (!eof()); 236 } 237 238 /** 239 * {@inheritDoc} 240 */ 241 public SymbolPropertyTable nextTable() throws BioException { 242 243 if (eof()) { 244 throw new NoSuchElementException(); 245 } 246 247 readSection(); 248 249 if (keyChar != 'H') { 250 throw new BioException("Expected 'H' but found: '" + keyChar 251 + "'."); 252 } 253 AAindex aaIndex = new AAindex(stringValue.trim()); 254 255 readSections: while (!eof()) { 256 257 readSection(); 258 259 switch (keyChar) { 260 case 'D': 261 aaIndex.setDescription(stringValue); 262 break; 263 case 'R': 264 aaIndex.setLITDBEntryNumbers(stringValue.split("\\s+")); 265 break; 266 case 'A': 267 aaIndex.setArticleAuthors(stringValue); 268 break; 269 case 'T': 270 aaIndex.setArticleTitle(stringValue); 271 break; 272 case 'J': 273 aaIndex.setJournalReference(stringValue); 274 break; 275 case 'C': 276 String[] keyValuePairs = stringValue.split("\\s+"); 277 Map similarEntries = aaIndex.similarEntries(); 278 for (int i = 0; i < keyValuePairs.length - 1; i += 2) { 279 similarEntries.put(keyValuePairs[i], Double 280 .valueOf(keyValuePairs[i + 1])); 281 } 282 break; 283 case 'I': 284 String[] headersAndIndices = stringValue.split("\\s+"); 285 for (int i = 0; i < 20; i++) { 286 try { 287 aaIndex.setDoubleProperty(aa[i], 288 headersAndIndices[11 + i]); 289 } catch (NumberFormatException e) { 290 aaIndex.setDoubleProperty(aa[i], 291 "NaN"); 292 } 293 } 294 break; 295 case '*': 296 aaIndex.setComment(stringValue); 297 break; 298 case '/': 299 break readSections; 300 default: 301 throw new BioException("Invalid key char found: " + keyChar 302 + "'."); 303 } 304 } 305 306 return aaIndex; 307 } 308}