001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 */
020
021
022package org.biojava.bio.dist;
023
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.util.ArrayList;
028import java.util.List;
029
030import javax.xml.parsers.ParserConfigurationException;
031import javax.xml.parsers.SAXParser;
032import javax.xml.parsers.SAXParserFactory;
033
034import org.biojava.bio.BioException;
035import org.biojava.bio.seq.io.SymbolTokenization;
036import org.biojava.bio.symbol.Alphabet;
037import org.biojava.bio.symbol.AlphabetManager;
038import org.biojava.bio.symbol.AtomicSymbol;
039import org.biojava.bio.symbol.IllegalAlphabetException;
040import org.biojava.bio.symbol.IllegalSymbolException;
041import org.biojava.bio.symbol.Symbol;
042import org.biojava.utils.ChangeVetoException;
043import org.xml.sax.Attributes;
044import org.xml.sax.InputSource;
045import org.xml.sax.SAXException;
046import org.xml.sax.helpers.DefaultHandler;
047
048
049/**
050 * A SAX parser that reads an XML representation of a
051 * Distribution from a file and recreates it as a Distribution Object.
052 * Handles OrderNDistributions and Simple Distributions but
053 * ensure the OrderNDistributions being read in was made using
054 * conditioning and conditioned Alphabets.
055 *
056 * @author Russell Smithies
057 * @author Mark Schreiber
058 * @author Matthew Pocock
059 * @since 1.3
060 */
061public class XMLDistributionReader extends DefaultHandler {
062  //fixme: the handler and the user API seem to be muddled up - can the
063  // DefaultHandler impl be factored out into a static and hopefully private
064  // class?
065    private Alphabet alpha = null;
066    private Distribution dist = null;
067    private DistributionFactory fact = null;
068    //private OrderNDistributionFactory ondFact = null;
069    private SymbolTokenization nameParser = null;
070    private Symbol sym = null;
071
072    private Alphabet conditioningAlpha = null;
073    private AtomicSymbol conditioningSymbol = null;
074    private SymbolTokenization conditioningTok = null;
075    private Alphabet conditionedAlpha = null;
076    private AtomicSymbol conditionedSymbol = null;
077    private SymbolTokenization conditionedTok = null;
078
079    private Distribution getDist() {
080        return dist;
081    } //end getDist
082
083    /**
084     * Reads an XML representation of a Distribution from a file.
085     *
086     * @param is input in XML format
087     * @return dist the Distribution created.
088     * @throws IOException if an error occurs during reading.
089     * @throws SAXException if the XML is not as expected.
090     */
091    public Distribution parseXML(InputStream is) throws IOException, SAXException{
092        //org.xml.sax.XMLReader parser = new org.apache.xerces.parsers.SAXParser();
093        SAXParserFactory fact = SAXParserFactory.newInstance();
094        fact.setNamespaceAware(true);
095        try {
096          SAXParser parser = fact.newSAXParser();
097
098//          parser.setContentHandler(this);
099//
100//          parser.setErrorHandler(this);
101
102          InputSource xml = null;
103
104          xml = new InputSource(new InputStreamReader(is));
105
106          parser.parse(xml, this);
107        }
108        catch (ParserConfigurationException ex) {
109         //really shouldn't happen
110         throw new SAXException("Cannot make SAXParser",ex);
111        }
112
113        return this.getDist();
114    }
115
116    /**
117     * Required by SAXParser to be public.
118     *
119     * <p>
120     * It is not reccomended that you use this method directly. Use ParseXML
121     * instead.
122     * </p>
123     */
124    public void startElement(String nameSpaceURI, String localName, String rawName, Attributes attributes)
125                      throws SAXException{
126        if (localName.equals("Distribution") || localName.equals("OrderNDistribution")) {
127            processDistElement(attributes);
128        } else if (localName.equals("alphabet")) {
129            processAlphabetElement(attributes);
130        } else if (localName.equals("conditioning_symbol")) {
131            processConditioningSymbol(attributes);
132        } else if (localName.equals("weight")) {
133            processWeightElement(attributes);
134        }
135    } //end startElement
136
137    private void processConditioningSymbol(Attributes attr)
138                                    throws SAXException {
139        String name = attr.getValue("name");
140        try {
141          conditioningSymbol = (AtomicSymbol)conditioningTok.parseToken(name);
142        }
143        catch (IllegalSymbolException ex) {
144          throw new SAXException(ex);
145        }
146
147    }
148
149    private void processWeightElement(Attributes attr)
150                               throws SAXException{
151        double weight = 0.0;
152
153        try {
154            //get the weight of the symbol
155            weight = Double.parseDouble(attr.getValue("prob"));
156        } catch (NumberFormatException ex) {
157            //catches the "NAN" string
158            weight = 0.0;
159        }
160
161        //add counts if SIMPLE DISTRIBUTION
162        if ((dist instanceof OrderNDistribution) == false) {
163            try {
164                //initialize the tokenizer
165                nameParser = dist.getAlphabet().getTokenization("name");
166            } catch (BioException ex) {
167                throw new SAXException("Couldn't get tokenization for "
168                                       +dist.getAlphabet().getName(), ex);
169            }
170
171            try {
172                //get the symbol name
173                sym = nameParser.parseToken(attr.getValue("sym"));
174
175
176                //add count to dist
177                dist.setWeight(sym, weight);
178            } catch (IllegalSymbolException ex) {
179                throw new SAXException("Illegal symbol found", ex);
180            } catch (ChangeVetoException ex) {
181                throw new SAXException("Distribution has been locked, possible synchronization problem !?",ex);
182            }
183
184            //add countf if ORDER N DISTRIBUTION
185        } else if (dist instanceof OrderNDistribution) {
186
187
188            //get the weight for symbol
189            try {
190                //get the weight of the symbol
191                weight = Double.parseDouble(attr.getValue("prob"));
192            } catch (NumberFormatException ex) {
193                weight = 0.0;
194            }
195
196            //rebuild the symbol from the conditioning and conditioned symbol
197            String name = attr.getValue("sym");
198            try {
199              conditionedSymbol = (AtomicSymbol)conditionedTok.parseToken(name);
200            }
201            catch (IllegalSymbolException ex) {
202              throw new SAXException(ex);
203            }
204            List l = new ArrayList();
205            l.add(conditioningSymbol);
206            l.add(conditionedSymbol);
207
208
209            try {
210                sym = alpha.getSymbol(l);
211                //set weights on distribution
212                dist.setWeight(sym, weight);
213            } catch (IllegalSymbolException ex) {
214                throw new SAXException("Illegal symbol found", ex);
215            } catch (ChangeVetoException ex) {
216                throw new SAXException("Distribution has been locked, possible synchronization problem !?",ex);
217            }
218        }
219    } //end processWeight
220
221    private void processAlphabetElement(Attributes attr)
222                                 throws SAXException {
223        String alphaName = attr.getValue("name");
224
225
226        //get Alphabet
227        alpha = AlphabetManager.alphabetForName(alphaName);
228
229        //make the Distribution
230        try {
231            dist = fact.createDistribution(alpha);
232            if(dist instanceof OrderNDistribution){
233              conditionedAlpha = ((OrderNDistribution)dist).getConditionedAlphabet();
234              conditionedTok = conditionedAlpha.getTokenization("name");
235
236              conditioningAlpha = ((OrderNDistribution)dist).getConditioningAlphabet();
237              conditioningTok = conditioningAlpha.getTokenization("name");
238            }
239
240        } catch (IllegalAlphabetException ex) {
241            throw new SAXException(ex);
242        } catch (BioException ex) {
243            throw new SAXException(ex);
244        }
245    } //end processAlphabetElement
246
247    private void processDistElement(Attributes attr) throws SAXException {
248        if (attr.getValue("type").equals("Distribution")) {
249            fact = DistributionFactory.DEFAULT;
250        } else if (attr.getValue("type").equals("OrderNDistribution")) {
251            fact = OrderNDistributionFactory.DEFAULT;
252        } else {
253            throw new SAXException("Element must be a distribution");
254        }
255    } //end processDistElement
256
257}