001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.program; 022 023import java.io.BufferedReader; 024import java.io.FileInputStream; 025import java.io.InputStreamReader; 026 027import org.biojava.bio.program.sax.BlastLikeSAXParser; 028import org.biojava.bio.program.xml.SimpleXMLEmitter; 029import org.xml.sax.ContentHandler; 030import org.xml.sax.InputSource; 031import org.xml.sax.XMLReader; 032 033/** 034 * <p> 035 * A class that converts the raw output from a variety of bioinformatics 036 * software and converts it to XML that will validate against the 037 * biojava:BlastLikeDataSetCollection DTD. 038 * <p> 039 * For applications supported, please the documentation for the 040 * BlastLikeSAXParser. 041 * <p> 042 * Examination of the source code of this application also serves as 043 * demonstration of the simplicity of using the biojava blast-like SAX2 044 * parsing framework. The main functionality of the application is 045 * simply built from the following code, <i>viz.</i>: 046 * <pre> 047 * 048 * !** 049 * * The following code creates a parser for native output 050 * * from BlastLike programs. That is, 051 * * Create a SAX2 Parser that takes the native output 052 * * from blast-like bioinformatics software. 053 * *! 054 * <font color="#0000FF"> 055 * XMLReader oParser = 056 * (XMLReader) new BlastLikeSAXParser(); 057 * </font> 058 * !** 059 * * Namespace support controls the way in which 060 * * XML elements are reported. In XML, when an element 061 * * looks something like <biojava:Hit> then, 062 * * the part before the colon, i.e. biojava is the namespace, 063 * * and the part after the colon i.e. Hit is the Local name. 064 * * The full "biojava:Hit" name is termed the Qualified Name (QNames). 065 * * By default SAX2 parsers report Local Names, in this 066 * * example, we decided we wanted to make the parser report QNames. 067 * * 068 * * If you don't want to change default namespace support, you 069 * * can ignore the next piece of code. 070 * * 071 * * Dynamically change configuration of the parser 072 * * in regard of Namespace support. Here, 073 * * the xml.org/features/namespaces feature is simply "reset" 074 * * to its default value for SAX2. 075 * * The xml.org/features/namespaces-prefixes feature is 076 * * also set to true. This is to ensure that xmlns attributes 077 * * are reported by the parser. These are required because we want 078 * * to configure the XMLEmitter to output qualified names (see below). 079 * *! 080 * <font color="#0000FF"> 081 * try { 082 * oParser.setFeature("http://xml.org/sax/features/namespaces",true); 083 * oParser.setFeature( 084 * "http://xml.org/sax/features/namespace-prefixes",true); 085 * 086 * } catch (Exception e) { 087 * } 088 * </font> 089 * 090 * !** 091 * * Having selected the parser, we now want to 092 * * choose an object to deal with the SAX2 events 093 * * that the parser produces. This is the class 094 * * that you would normally write yourself to deal 095 * * with particular events you are interested in. 096 * * This class implements the ContentHandler - usually, 097 * * you would inherit from a SAX2 helper class that 098 * * implements this interface for you. 099 * * 100 * * Create an XML ContentHandler. This 101 * * implementation of the DocumentHandler 102 * * interface simply outputs nicely formatted 103 * * XML. Passing a true value to the SimpleXMLEmitter 104 * * constructor instructs the ContentHandler 105 * * to take QNames from the SAXParser, rather 106 * * than LocalNames. 107 * * 108 * <font color="#0000FF"> 109 * ContentHandler oHandler = 110 * (ContentHandler) new SimpleXMLEmitter(true); 111 * </font> 112 * 113 * !** 114 * * Now, link the Parser and the ContentHandler. 115 * * 116 * * Give the parser a reference to the ContentHandler 117 * * so that it can send SAX2 mesagges. 118 * *! 119 * <font color="#0000FF"> 120 * oParser.setContentHandler(oHandler); 121 * </font> 122 * !** 123 * * Finally, parse your Blast-like output. 124 * * 125 * * Now make the Parser parse the output from the 126 * * blast-like software and emit XML as specificed 127 * * by the ContentHandler. 128 * *! 129 * <font color="#0000FF"> 130 * oParser.parse(oInput); 131 * </font> 132 * </pre> 133 * 134 * <p> 135 * Copyright © 2000 Cambridge Antibody Technology. 136 * 137 * <p> 138 * Primary author -<ul> 139 * <li>Simon Brocklehurst (CAT) 140 * </ul> 141 * Other authors -<ul> 142 * <li>Tim Dilks (CAT) 143 * <li>Colin Hardman (CAT) 144 * <li>Stuart Johnston (CAT) 145 * <li>Mathieu Wiepert (Mayo Foundation) 146 *</ul> 147 * 148 * 149 * @author Cambridge Antibody Technology (CAT) 150 * @version 1.0 151 * 152 * @see BlastLikeSAXParser 153 * @see SimpleXMLEmitter 154 */ 155public class BlastLikeToXMLConverter { 156 157 private String oInput; 158 private XMLReader oParser; 159 private boolean tStrict = true; 160 161 /** 162 * Creates a new <code>BlastToXMLConverter</code> instance. 163 * 164 */ 165 public BlastLikeToXMLConverter(String poInput) { 166 oInput = poInput; 167 } 168 169 public void convert() throws java.io.IOException, 170 org.xml.sax.SAXException { 171 172 //Access functionality of biojava classes through 173 //standard org.xml.sax interfaces... 174 175 /** 176 * Create a SAX Parser that takes the native output 177 * from blast-like bioinformatics software. 178 */ 179 oParser = (XMLReader) new BlastLikeSAXParser(); 180 181 if (tStrict) { 182 ((BlastLikeSAXParser) oParser).setModeStrict(); 183 } else { 184 ((BlastLikeSAXParser) oParser).setModeLazy(); 185 } 186 /** 187 * Dynamically change configuration of the parser 188 * in regard of Namespace support. Here, 189 * the xml.org/sax/features/namespaces feature is simply "reset" 190 * to its default value for SAX2. 191 * The xml.org/sax/features/namespaces-prefixes feature is 192 * also set to true. This is to ensure that xmlns attributes 193 * are reported by the parser. These are required because we want 194 * to configure the XMLEmitter to output qualified names (see below). 195 */ 196 try { 197 oParser.setFeature("http://xml.org/sax/features/namespaces",true); 198 oParser.setFeature("http://xml.org/sax/features/namespace-prefixes", 199 true); 200 201 } catch (Exception e) { 202 //If an illegal conmbination of features is chosen, 203 //roll back to default settings. Output a warning, 204 //even though this might mess up the output... 205 System.out.println("WARNING: ignoring attempt to set illegal " + 206 "combination of parser features"); 207 System.out.println(e); 208 } 209 /** 210 * Create an XML ContentHandler. This 211 * implementation of the DocumentHandler 212 * interface simple outputs nicely formatted 213 * XML. Passing a true value to the SimpleXMLEmitter 214 * constructor instructs the ContentHandler 215 * to take QNames from the SAXParser, rather 216 * than LocalNames. 217 */ 218 ContentHandler oHandler = 219 (ContentHandler) new SimpleXMLEmitter(true); 220 221 /** 222 * Give the parser a reference to the ContentHandler 223 * so that it can send SAX2 mesagges. 224 */ 225 oParser.setContentHandler(oHandler); 226 /** 227 * Now make the Parser parse the output from the 228 * blast-like software and emit XML as specificed 229 * by the DocumentHandler. 230 */ 231 //Test direct specification of URI 232 //oParser.parse(oInput); 233 234 235 236 //Test direct specification of URI via InputSource 237 //oParser.parse(new InputSource(oInput)); 238 239 240 241 FileInputStream oInputFileStream; 242 BufferedReader oContents; 243 244 //Test parsing using ByteSteam as InputSource 245 // Open file and read all lines from file sequentially 246// try{ 247// oInputFileStream = new FileInputStream(oInput); 248// // create input stream 249 250// oParser.parse(new InputSource(oInputFileStream)); 251 252// } catch (java.io.FileNotFoundException x) { 253// System.out.println(x.getMessage()); 254// System.out.println("Couldn't open file"); 255// System.exit(0); 256// } 257 258 259 //Test parsing using CharacterStream as InputSource 260 // Open file and read all lines from file sequentially 261 try{ 262 oInputFileStream = new FileInputStream(oInput); 263 // create input stream 264 oContents = new 265 BufferedReader(new InputStreamReader(oInputFileStream)); 266 267 oParser.parse(new InputSource(oContents)); 268 269 } catch (java.io.FileNotFoundException x) { 270 System.out.println(x.getMessage()); 271 System.out.println("Couldn't open file"); 272 System.exit(0); 273 } 274 275 276 System.out.println(); 277 278 } 279 280 public void setModeStrict() { 281 tStrict = true; 282 } 283 public void setModeLazy() { 284 tStrict = false; 285 286 } 287 288 289}