001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022/*
023 *    AAindexStreamReader.java
024 */
025package org.biojava.bio.proteomics.aaindex;
026
027import java.io.BufferedReader;
028import java.io.IOException;
029import java.io.Reader;
030import java.util.Map;
031import java.util.NoSuchElementException;
032
033import org.biojava.bio.BioException;
034import org.biojava.bio.seq.io.SymbolTokenization;
035import org.biojava.bio.symbol.Symbol;
036import org.biojava.bio.symbol.SymbolPropertyTable;
037
038/**
039 * Iterator over {@link org.biojava.bio.proteomics.aaindex.AAindex} objects that 
040 * are stored in a stream in the AAindex1 file format. The format
041 * of such an Amino Acid Index Database file is described in the
042 * <a href="http://www.genome.ad.jp/dbget-bin/show_man?aaindex">AAindex manual
043 * </a>. The {@link #nextTable()} method returns objects of type
044 * {@link org.biojava.bio.proteomics.aaindex.AAindex}. See this class also for
045 * further informations. To hold an AAindex1 file in memory for random access
046 * use the {@link org.biojava.bio.proteomics.aaindex.SimpleSymbolPropertyTableDB}
047 * class:
048 * <pre>
049 * SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB(
050 *         new AAindexStreamReader(new FileReader("aaindex1")));
051 * AAindex hydrophobicity = (AAindex) db.table("CIDH920105");
052 * SymbolList symbols = ProteinTools.createProtein(
053 *     "ARNDCEQGHILKMFPSTWYV");
054 * double hp = 0.0;
055 * for (int i = 1; i <= symbols.length(); i++) {
056 *     hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i));
057 * }
058 * System.out.println("Average hydrophobicity: " + Double.toString(
059 *         hp / symbols.length()));
060 * </pre>
061 * <p><b>References:</b></p>
062 * <p><a href="http://www.genome.ad.jp/dbget/aaindex.html">AAindex web 
063 * site</a>.</p>
064 * <p>Kawashima, S. and Kanehisa, M.; AAindex: amino acid index database. 
065 * Nucleic Acids Res. 28, 374 (2000).</p>
066 * <p>Tomii, K. and Kanehisa, M.;  Analysis of amino acid indices and mutation 
067 * matrices for sequence comparison and structure prediction of proteins. 
068 * Protein Eng. 9, 27-36 (1996).</p>
069 * <p>Nakai, K., Kidera, A., and Kanehisa, M.;  Cluster analysis of amino acid 
070 * indices for prediction of protein structure and function.  
071 * Protein Eng. 2, 93-100 (1988)</p>
072 * @author <a href="mailto:Martin.Szugat@GMX.net">Martin Szugat</a>
073 * @version $Revision$
074 */
075public class AAindexStreamReader implements SymbolPropertyTableIterator {
076    
077//    public static final void main(String[] args) throws NullPointerException, 
078//    FileNotFoundException, BioException, IOException {
079//        SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB(
080//                new AAindexStreamReader(new FileReader("aaindex1")));
081//        AAindex hydrophobicity = (AAindex) db.table("CIDH920105");
082//        SymbolList symbols = ProteinTools.createProtein(
083//            "ARNDCEQGHILKMFPSTWYV");
084//        double hp = 0.0;
085//        for (int i = 1; i <= symbols.length(); i++) {
086//            hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i));
087//        }
088//        System.out.println("Average hydrophobicity: " + Double.toString(
089//                hp / symbols.length()));
090//    }
091//        
092    /* PRIVATE CONSTANTS */
093
094    /**
095     * Name of the tokenizer.
096     */
097    private static final String TOKENIZER = "token";
098
099    /* STATIC FIELDS */
100
101    /**
102     * List of amino acid symbols.
103     */
104    private static Symbol[] aa = null;
105
106    /* STATIC CONSTRUCTOR */
107
108    static {
109        try {
110            SymbolTokenization tokenizer = 
111                AAindex.PROTEIN_ALPHABET.getTokenization(TOKENIZER);
112            aa = new Symbol[] {tokenizer.parseToken("A"),
113                    tokenizer.parseToken("R"), tokenizer.parseToken("N"),
114                    tokenizer.parseToken("D"), tokenizer.parseToken("C"),
115                    tokenizer.parseToken("Q"), tokenizer.parseToken("E"),
116                    tokenizer.parseToken("G"), tokenizer.parseToken("H"),
117                    tokenizer.parseToken("I"), tokenizer.parseToken("L"),
118                    tokenizer.parseToken("K"), tokenizer.parseToken("M"),
119                    tokenizer.parseToken("F"), tokenizer.parseToken("P"),
120                    tokenizer.parseToken("S"), tokenizer.parseToken("T"),
121                    tokenizer.parseToken("W"), tokenizer.parseToken("Y"),
122                    tokenizer.parseToken("V"), };
123        } catch (BioException e) {
124            e.printStackTrace();
125        } catch (IndexOutOfBoundsException e) {
126            e.printStackTrace();
127        }
128    };
129    
130    /* PRIVATE FIELDS */
131
132    /**
133     * The internal reader.
134     */
135    private BufferedReader reader = null;
136
137    /**
138     * The current read line.
139     */
140    private String line = null;
141
142    /**
143     * The key char of the current read section. 
144     */
145    private char keyChar;
146
147    /**
148     * The value of the current read section.
149     */
150    private String stringValue;
151    
152    /* PUBLIC CONSTRUCTORS */
153
154    /**
155     * Initializes the iterator.
156     * @param reader reader over a stream in the AAindex file format.
157     * @throws IOException if the stream could not be read.
158     * @throws NullPointerException if <code>reader</code> is <code>null</code>.
159     */
160    public AAindexStreamReader(Reader reader) throws IOException, 
161    NullPointerException {
162        this(new BufferedReader(reader));
163    }
164
165    /**
166     * Initializes the iterator.
167     * @param reader buffered reader over a stream in the AAindex file format.
168     * @throws IOException if the stream could not be read.
169     * @throws NullPointerException if <code>reader</code> is <code>null</code>.
170     */
171    public AAindexStreamReader(BufferedReader reader) throws IOException,
172    NullPointerException {
173        if (reader == null) {
174            throw new NullPointerException("reader is null.");
175        }
176        this.reader = reader;
177        line = reader.readLine();
178    }
179    
180    /* PUBLIC METHODS */
181    
182    /**
183     * Checks if the end of the file or stream is reached.
184     * @return <code>true</code> if the end of the file is reached,
185     * <code>false</code> otherwise. 
186     */
187    public boolean eof() {
188        if (line == null) {
189            return true;
190        } else {
191            while (line != null && line.length() == 0) {
192                try {
193                    line = reader.readLine();
194                } catch (IOException e) {
195                    return true;
196                }
197            }
198            return (line == null);
199        }
200    }
201
202    /**
203     * Reads a AAindex section.
204     * @throws BioException if the section could not be read.
205     */
206    private void readSection() throws BioException {
207
208        keyChar = line.charAt(0);
209
210        StringBuffer stringBuffer = new StringBuffer();
211
212        do {
213            if (line.length() > 2) {
214                stringBuffer.append(line.substring(2));
215                if (!line.endsWith(" ")) {
216                    stringBuffer.append(" ");
217                }
218            }
219            try {
220                line = reader.readLine();
221            } catch (IOException e) {
222                throw new BioException(e);
223            }
224        } while (!eof() && line.charAt(0) == ' ');
225
226        stringValue = stringBuffer.toString();
227    }
228
229    /* INTERFACE SymbolPropertyTableIterator */
230
231    /**
232     * {@inheritDoc}
233     */
234    public boolean hasNext() {
235        return (!eof());
236    }
237
238    /**
239     * {@inheritDoc}
240     */
241    public SymbolPropertyTable nextTable() throws BioException {
242
243        if (eof()) {
244            throw new NoSuchElementException();
245        }
246
247        readSection();
248
249        if (keyChar != 'H') {
250            throw new BioException("Expected 'H' but found: '" + keyChar
251                    + "'.");
252        }
253        AAindex aaIndex = new AAindex(stringValue.trim());
254
255        readSections: while (!eof()) {
256
257            readSection();
258
259            switch (keyChar) {
260            case 'D':
261                aaIndex.setDescription(stringValue);
262                break;
263            case 'R':
264                aaIndex.setLITDBEntryNumbers(stringValue.split("\\s+"));
265                break;
266            case 'A':
267                aaIndex.setArticleAuthors(stringValue);
268                break;
269            case 'T':
270                aaIndex.setArticleTitle(stringValue);
271                break;
272            case 'J':
273                aaIndex.setJournalReference(stringValue);
274                break;
275            case 'C':
276                String[] keyValuePairs = stringValue.split("\\s+");
277                Map similarEntries = aaIndex.similarEntries();
278                for (int i = 0; i < keyValuePairs.length - 1; i += 2) {
279                    similarEntries.put(keyValuePairs[i], Double
280                            .valueOf(keyValuePairs[i + 1]));
281                }
282                break;
283            case 'I':
284                String[] headersAndIndices = stringValue.split("\\s+");
285                for (int i = 0; i < 20; i++) {
286                    try {
287                        aaIndex.setDoubleProperty(aa[i],
288                                headersAndIndices[11 + i]);
289                    } catch (NumberFormatException e) {
290                        aaIndex.setDoubleProperty(aa[i],
291                                        "NaN");
292                    }
293                }
294                break;
295            case '*':
296                aaIndex.setComment(stringValue);
297                break;
298            case '/':
299                break readSections;
300            default:
301                throw new BioException("Invalid key char found: " + keyChar
302                        + "'.");
303            }
304        }
305        
306        return aaIndex;
307    }
308}