001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 */ 020 021 022package org.biojava.bio.dist; 023 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.util.ArrayList; 028import java.util.List; 029 030import javax.xml.parsers.ParserConfigurationException; 031import javax.xml.parsers.SAXParser; 032import javax.xml.parsers.SAXParserFactory; 033 034import org.biojava.bio.BioException; 035import org.biojava.bio.seq.io.SymbolTokenization; 036import org.biojava.bio.symbol.Alphabet; 037import org.biojava.bio.symbol.AlphabetManager; 038import org.biojava.bio.symbol.AtomicSymbol; 039import org.biojava.bio.symbol.IllegalAlphabetException; 040import org.biojava.bio.symbol.IllegalSymbolException; 041import org.biojava.bio.symbol.Symbol; 042import org.biojava.utils.ChangeVetoException; 043import org.xml.sax.Attributes; 044import org.xml.sax.InputSource; 045import org.xml.sax.SAXException; 046import org.xml.sax.helpers.DefaultHandler; 047 048 049/** 050 * A SAX parser that reads an XML representation of a 051 * Distribution from a file and recreates it as a Distribution Object. 052 * Handles OrderNDistributions and Simple Distributions but 053 * ensure the OrderNDistributions being read in was made using 054 * conditioning and conditioned Alphabets. 055 * 056 * @author Russell Smithies 057 * @author Mark Schreiber 058 * @author Matthew Pocock 059 * @since 1.3 060 */ 061public class XMLDistributionReader extends DefaultHandler { 062 //fixme: the handler and the user API seem to be muddled up - can the 063 // DefaultHandler impl be factored out into a static and hopefully private 064 // class? 065 private Alphabet alpha = null; 066 private Distribution dist = null; 067 private DistributionFactory fact = null; 068 //private OrderNDistributionFactory ondFact = null; 069 private SymbolTokenization nameParser = null; 070 private Symbol sym = null; 071 072 private Alphabet conditioningAlpha = null; 073 private AtomicSymbol conditioningSymbol = null; 074 private SymbolTokenization conditioningTok = null; 075 private Alphabet conditionedAlpha = null; 076 private AtomicSymbol conditionedSymbol = null; 077 private SymbolTokenization conditionedTok = null; 078 079 private Distribution getDist() { 080 return dist; 081 } //end getDist 082 083 /** 084 * Reads an XML representation of a Distribution from a file. 085 * 086 * @param is input in XML format 087 * @return dist the Distribution created. 088 * @throws IOException if an error occurs during reading. 089 * @throws SAXException if the XML is not as expected. 090 */ 091 public Distribution parseXML(InputStream is) throws IOException, SAXException{ 092 //org.xml.sax.XMLReader parser = new org.apache.xerces.parsers.SAXParser(); 093 SAXParserFactory fact = SAXParserFactory.newInstance(); 094 fact.setNamespaceAware(true); 095 try { 096 SAXParser parser = fact.newSAXParser(); 097 098// parser.setContentHandler(this); 099// 100// parser.setErrorHandler(this); 101 102 InputSource xml = null; 103 104 xml = new InputSource(new InputStreamReader(is)); 105 106 parser.parse(xml, this); 107 } 108 catch (ParserConfigurationException ex) { 109 //really shouldn't happen 110 throw new SAXException("Cannot make SAXParser",ex); 111 } 112 113 return this.getDist(); 114 } 115 116 /** 117 * Required by SAXParser to be public. 118 * 119 * <p> 120 * It is not reccomended that you use this method directly. Use ParseXML 121 * instead. 122 * </p> 123 */ 124 public void startElement(String nameSpaceURI, String localName, String rawName, Attributes attributes) 125 throws SAXException{ 126 if (localName.equals("Distribution") || localName.equals("OrderNDistribution")) { 127 processDistElement(attributes); 128 } else if (localName.equals("alphabet")) { 129 processAlphabetElement(attributes); 130 } else if (localName.equals("conditioning_symbol")) { 131 processConditioningSymbol(attributes); 132 } else if (localName.equals("weight")) { 133 processWeightElement(attributes); 134 } 135 } //end startElement 136 137 private void processConditioningSymbol(Attributes attr) 138 throws SAXException { 139 String name = attr.getValue("name"); 140 try { 141 conditioningSymbol = (AtomicSymbol)conditioningTok.parseToken(name); 142 } 143 catch (IllegalSymbolException ex) { 144 throw new SAXException(ex); 145 } 146 147 } 148 149 private void processWeightElement(Attributes attr) 150 throws SAXException{ 151 double weight = 0.0; 152 153 try { 154 //get the weight of the symbol 155 weight = Double.parseDouble(attr.getValue("prob")); 156 } catch (NumberFormatException ex) { 157 //catches the "NAN" string 158 weight = 0.0; 159 } 160 161 //add counts if SIMPLE DISTRIBUTION 162 if ((dist instanceof OrderNDistribution) == false) { 163 try { 164 //initialize the tokenizer 165 nameParser = dist.getAlphabet().getTokenization("name"); 166 } catch (BioException ex) { 167 throw new SAXException("Couldn't get tokenization for " 168 +dist.getAlphabet().getName(), ex); 169 } 170 171 try { 172 //get the symbol name 173 sym = nameParser.parseToken(attr.getValue("sym")); 174 175 176 //add count to dist 177 dist.setWeight(sym, weight); 178 } catch (IllegalSymbolException ex) { 179 throw new SAXException("Illegal symbol found", ex); 180 } catch (ChangeVetoException ex) { 181 throw new SAXException("Distribution has been locked, possible synchronization problem !?",ex); 182 } 183 184 //add countf if ORDER N DISTRIBUTION 185 } else if (dist instanceof OrderNDistribution) { 186 187 188 //get the weight for symbol 189 try { 190 //get the weight of the symbol 191 weight = Double.parseDouble(attr.getValue("prob")); 192 } catch (NumberFormatException ex) { 193 weight = 0.0; 194 } 195 196 //rebuild the symbol from the conditioning and conditioned symbol 197 String name = attr.getValue("sym"); 198 try { 199 conditionedSymbol = (AtomicSymbol)conditionedTok.parseToken(name); 200 } 201 catch (IllegalSymbolException ex) { 202 throw new SAXException(ex); 203 } 204 List l = new ArrayList(); 205 l.add(conditioningSymbol); 206 l.add(conditionedSymbol); 207 208 209 try { 210 sym = alpha.getSymbol(l); 211 //set weights on distribution 212 dist.setWeight(sym, weight); 213 } catch (IllegalSymbolException ex) { 214 throw new SAXException("Illegal symbol found", ex); 215 } catch (ChangeVetoException ex) { 216 throw new SAXException("Distribution has been locked, possible synchronization problem !?",ex); 217 } 218 } 219 } //end processWeight 220 221 private void processAlphabetElement(Attributes attr) 222 throws SAXException { 223 String alphaName = attr.getValue("name"); 224 225 226 //get Alphabet 227 alpha = AlphabetManager.alphabetForName(alphaName); 228 229 //make the Distribution 230 try { 231 dist = fact.createDistribution(alpha); 232 if(dist instanceof OrderNDistribution){ 233 conditionedAlpha = ((OrderNDistribution)dist).getConditionedAlphabet(); 234 conditionedTok = conditionedAlpha.getTokenization("name"); 235 236 conditioningAlpha = ((OrderNDistribution)dist).getConditioningAlphabet(); 237 conditioningTok = conditioningAlpha.getTokenization("name"); 238 } 239 240 } catch (IllegalAlphabetException ex) { 241 throw new SAXException(ex); 242 } catch (BioException ex) { 243 throw new SAXException(ex); 244 } 245 } //end processAlphabetElement 246 247 private void processDistElement(Attributes attr) throws SAXException { 248 if (attr.getValue("type").equals("Distribution")) { 249 fact = DistributionFactory.DEFAULT; 250 } else if (attr.getValue("type").equals("OrderNDistribution")) { 251 fact = OrderNDistributionFactory.DEFAULT; 252 } else { 253 throw new SAXException("Element must be a distribution"); 254 } 255 } //end processDistElement 256 257}