001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022 023package org.biojava.bio.program; 024 025import java.io.BufferedReader; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.InputStreamReader; 029import java.io.StreamTokenizer; 030import java.util.ArrayList; 031import java.util.List; 032 033import org.biojava.bio.BioError; 034import org.biojava.bio.dist.DistributionFactory; 035import org.biojava.bio.dp.SimpleWeightMatrix; 036import org.biojava.bio.seq.io.SymbolTokenization; 037import org.biojava.bio.symbol.FiniteAlphabet; 038import org.biojava.bio.symbol.IllegalAlphabetException; 039import org.biojava.bio.symbol.IllegalSymbolException; 040import org.biojava.bio.symbol.SimpleSymbolList; 041import org.biojava.bio.symbol.SymbolList; 042import org.biojava.utils.ChangeVetoException; 043 044/** 045 * The results of a meme run. 046 * 047 * @author Matthew Pocock 048 */ 049public class Meme { 050 private List motifs; 051 private List seqIDs; 052 053 { 054 motifs = new ArrayList(); 055 seqIDs = new ArrayList(); 056 } 057 058 public List getMotifs() { 059 return motifs; 060 } 061 062 public List getSeqIDs() { 063 return seqIDs; 064 } 065 066 public Meme(InputStream is, SymbolTokenization symParser) 067 throws IOException, IllegalSymbolException, IllegalAlphabetException { 068 StreamTokenizer st = new StreamTokenizer( 069 new BufferedReader(new InputStreamReader(is))); 070 st.eolIsSignificant(true); 071 st.wordChars('*', '*'); 072 st.parseNumbers(); 073 074 SymbolList sym = null; 075 076 ALPHABET: 077 while( true ) { 078 int nt = st.nextToken(); 079 if (nt == StreamTokenizer.TT_EOF) { 080 return; 081 } else if (nt == StreamTokenizer.TT_WORD) { 082 if(st.sval.startsWith("ALPHABET")) { 083 while(st.nextToken() != StreamTokenizer.TT_WORD) {} 084 sym = new SimpleSymbolList(symParser, st.sval); 085 break ALPHABET; 086 } 087 } 088 } 089 090 while(st.nextToken() != StreamTokenizer.TT_EOL) {} 091 while(st.nextToken() != StreamTokenizer.TT_EOL) {} 092 093 SEQLIST: 094 while( true ) { 095 if(st.nextToken() == StreamTokenizer.TT_WORD) { 096 if(st.sval != null && st.sval.startsWith("*")) 097 break SEQLIST; 098 099 //need this cause lines sometimes wrap!? 100 if(! st.sval.startsWith("Length")) 101 seqIDs.add(st.sval.intern()); 102 } 103 } 104 105 OUTER: 106 while( true ) { 107 int width = 0; 108 109 FINDMOTIF: 110 while( true ) { 111 int nt = st.nextToken(); 112 if (nt == StreamTokenizer.TT_EOF) { 113 break OUTER; 114 } else if (nt == StreamTokenizer.TT_WORD) { 115 if(st.sval.startsWith("MOTIF")) { 116 st.nextToken(); // MOTIF x 117 while(st.nextToken() != StreamTokenizer.TT_NUMBER) {} // width = w 118 width = (int) st.nval; // w 119 break FINDMOTIF; 120 } 121 } 122 } 123 124 FINDWEIGHTS: 125 while( true ) { 126 int nt = st.nextToken(); 127 if (nt == StreamTokenizer.TT_EOF) { 128 break OUTER; 129 } else if (nt == StreamTokenizer.TT_WORD) { 130 if(st.sval.startsWith("letter")) { 131 while(st.nextToken() != StreamTokenizer.TT_EOL) {} 132 break FINDWEIGHTS; 133 } 134 } 135 } 136 137 SimpleWeightMatrix matrix = new SimpleWeightMatrix( 138 (FiniteAlphabet) symParser.getAlphabet(), 139 width, 140 DistributionFactory.DEFAULT 141 ); 142 143 int r = 0; 144 int c = 0; 145 READMOTIF: 146 while( true ) { 147 int nt = st.nextToken(); 148 if (nt == StreamTokenizer.TT_EOF) { 149 break OUTER; 150 } else if (nt == StreamTokenizer.TT_EOL) { 151 r = 0; 152 c++; 153 if(c == width) 154 break READMOTIF; 155 } else if (nt == StreamTokenizer.TT_NUMBER) { 156 try { 157 matrix.getColumn(c).setWeight(sym.symbolAt(r+1), st.nval); 158 r++; 159 } catch (ChangeVetoException cve) { 160 throw new BioError("Couldn't set up the distribution ",cve); 161 } 162 } 163 } 164 165 motifs.add(matrix); 166 } 167 } 168}