001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.program;
024
025import java.io.BufferedReader;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.InputStreamReader;
029import java.io.StreamTokenizer;
030import java.util.ArrayList;
031import java.util.List;
032
033import org.biojava.bio.BioError;
034import org.biojava.bio.dist.DistributionFactory;
035import org.biojava.bio.dp.SimpleWeightMatrix;
036import org.biojava.bio.seq.io.SymbolTokenization;
037import org.biojava.bio.symbol.FiniteAlphabet;
038import org.biojava.bio.symbol.IllegalAlphabetException;
039import org.biojava.bio.symbol.IllegalSymbolException;
040import org.biojava.bio.symbol.SimpleSymbolList;
041import org.biojava.bio.symbol.SymbolList;
042import org.biojava.utils.ChangeVetoException;
043
044/**
045 * The results of a meme run.
046 *
047 * @author Matthew Pocock
048 */
049public class Meme {
050  private List motifs;
051  private List seqIDs;
052
053  {
054    motifs = new ArrayList();
055    seqIDs = new ArrayList();
056  }
057
058  public List getMotifs() {
059    return motifs;
060  }
061
062  public List getSeqIDs() {
063    return seqIDs;
064  }
065
066  public Meme(InputStream is, SymbolTokenization symParser)
067         throws IOException, IllegalSymbolException, IllegalAlphabetException {
068    StreamTokenizer st = new StreamTokenizer(
069      new BufferedReader(new InputStreamReader(is)));
070    st.eolIsSignificant(true);
071    st.wordChars('*', '*');
072    st.parseNumbers();
073
074    SymbolList sym = null;
075
076   ALPHABET:
077    while( true ) {
078      int nt = st.nextToken();
079      if (nt == StreamTokenizer.TT_EOF) {
080          return;
081      } else if (nt == StreamTokenizer.TT_WORD) {
082          if(st.sval.startsWith("ALPHABET")) {
083            while(st.nextToken() != StreamTokenizer.TT_WORD) {}
084            sym = new SimpleSymbolList(symParser, st.sval);
085            break ALPHABET;
086          }
087      }
088    }
089
090    while(st.nextToken() != StreamTokenizer.TT_EOL) {}
091    while(st.nextToken() != StreamTokenizer.TT_EOL) {}
092
093   SEQLIST:
094    while( true ) {
095      if(st.nextToken() == StreamTokenizer.TT_WORD) {
096          if(st.sval != null && st.sval.startsWith("*"))
097            break SEQLIST;
098
099          //need this cause lines sometimes wrap!?
100          if(! st.sval.startsWith("Length"))
101           seqIDs.add(st.sval.intern());
102      }
103    }
104
105   OUTER:
106    while( true ) {
107      int width = 0;
108
109     FINDMOTIF:
110      while( true ) {
111        int nt = st.nextToken();
112        if (nt == StreamTokenizer.TT_EOF) {
113            break OUTER;
114        } else if (nt == StreamTokenizer.TT_WORD) {
115            if(st.sval.startsWith("MOTIF")) {
116              st.nextToken();                   // MOTIF x
117              while(st.nextToken() != StreamTokenizer.TT_NUMBER) {} // width = w
118              width = (int) st.nval;            // w
119              break FINDMOTIF;
120            }
121        }
122      }
123
124     FINDWEIGHTS:
125      while( true ) {
126        int nt = st.nextToken();
127        if (nt == StreamTokenizer.TT_EOF) {
128            break OUTER;
129        } else if (nt == StreamTokenizer.TT_WORD) {
130            if(st.sval.startsWith("letter")) {
131              while(st.nextToken() != StreamTokenizer.TT_EOL) {}
132              break FINDWEIGHTS;
133            }
134        }
135      }
136
137      SimpleWeightMatrix matrix = new SimpleWeightMatrix(
138        (FiniteAlphabet) symParser.getAlphabet(),
139        width,
140        DistributionFactory.DEFAULT
141      );
142
143      int r = 0;
144      int c = 0;
145     READMOTIF:
146      while( true ) {
147        int nt = st.nextToken();
148        if (nt == StreamTokenizer.TT_EOF) {
149            break OUTER;
150        } else if (nt == StreamTokenizer.TT_EOL) {
151            r = 0;
152            c++;
153            if(c == width)
154              break READMOTIF;
155        } else if (nt == StreamTokenizer.TT_NUMBER) {
156          try {
157            matrix.getColumn(c).setWeight(sym.symbolAt(r+1), st.nval);
158            r++;
159          } catch (ChangeVetoException cve) {
160            throw new BioError("Couldn't set up the distribution ",cve);
161          }
162        }
163      }
164
165      motifs.add(matrix);
166    }
167  }
168}