001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.bio.program;
022
023import java.io.BufferedReader;
024import java.io.FileInputStream;
025import java.io.InputStreamReader;
026
027import org.biojava.bio.program.sax.BlastLikeSAXParser;
028import org.biojava.bio.program.xml.SimpleXMLEmitter;
029import org.xml.sax.ContentHandler;
030import org.xml.sax.InputSource;
031import org.xml.sax.XMLReader;
032
033/**
034 * <p>
035 * A class that converts the raw output from a variety of bioinformatics
036 * software and converts it to XML that will validate against the
037 * biojava:BlastLikeDataSetCollection DTD.
038 * <p>
039 * For applications supported, please the documentation for the
040 * BlastLikeSAXParser.
041 * <p>
042 * Examination of the source code of this application also serves as
043 * demonstration of the simplicity of using the biojava blast-like SAX2 
044 * parsing framework.  The main functionality of the application is
045 * simply built from the following code, <i>viz.</i>:
046 * <pre>
047 *
048 *      !**
049 *       * The following code creates a parser for native output
050 *       * from BlastLike programs. That is,
051 *       * Create a SAX2 Parser that takes the native output
052 *       * from blast-like bioinformatics software.
053 *       *!
054 *       <font color="#0000FF">
055 *        XMLReader oParser =
056 *       (XMLReader) new BlastLikeSAXParser();
057 *       </font>
058 *     !**
059 *       * Namespace support controls the way in which
060 *       * XML elements are reported. In XML, when an element
061 *       * looks something like <biojava:Hit> then,
062 *       * the part before the colon, i.e. biojava is the namespace,
063 *       * and the part after the colon i.e. Hit is the Local name.
064 *       * The full "biojava:Hit" name is termed the Qualified Name (QNames).
065 *       * By default SAX2 parsers report Local Names, in this
066 *       * example, we decided we wanted to make the parser report QNames.
067 *       *
068 *       * If you don't want to change default namespace support, you
069 *       * can ignore the next piece of code.
070 *       *
071 *       * Dynamically change configuration of the parser
072 *       * in regard of Namespace support. Here,
073 *       * the xml.org/features/namespaces feature is simply "reset"
074 *       * to its default value for SAX2.
075 *       * The xml.org/features/namespaces-prefixes feature is
076 *       * also set to true.  This is to ensure that xmlns attributes
077 *       * are reported by the parser. These are required because we want
078 *       * to configure the XMLEmitter to output qualified names (see below).
079 *       *!
080 *      <font color="#0000FF">
081 *      try {
082 *      oParser.setFeature("http://xml.org/sax/features/namespaces",true);
083 *      oParser.setFeature(
084 *              "http://xml.org/sax/features/namespace-prefixes",true);
085 *
086 *      } catch (Exception e) {
087 *      }
088 *      </font>
089 *
090 *      !**
091 *       * Having selected the parser, we now want to
092 *       * choose an object to deal with the SAX2 events
093 *       * that the parser produces. This is the class
094 *       * that you would normally write yourself to deal
095 *       * with particular events you are interested in.
096 *       * This class implements the ContentHandler - usually,
097 *       * you would inherit from a SAX2 helper class that
098 *       * implements this interface for you.
099 *       *
100 *       * Create an XML ContentHandler. This
101 *       * implementation of the DocumentHandler
102 *       * interface simply outputs nicely formatted
103 *       * XML. Passing a true value to the SimpleXMLEmitter
104 *       * constructor instructs the ContentHandler
105 *       * to take QNames from the SAXParser, rather
106 *       * than LocalNames.
107 *       *
108 *      <font color="#0000FF">
109 *      ContentHandler oHandler  = 
110 *      (ContentHandler) new SimpleXMLEmitter(true);
111 *      </font>
112 *
113 *      !**
114 *       * Now, link the Parser and the ContentHandler.
115 *       *
116 *       * Give the parser a reference to the ContentHandler
117 *       * so that it can send SAX2 mesagges.
118 *       *!
119 *      <font color="#0000FF">
120 *      oParser.setContentHandler(oHandler);
121 *      </font>
122 *      !**
123 *       * Finally, parse your Blast-like output.
124 *       *
125 *       * Now make the Parser parse the output from the
126 *       * blast-like software and emit XML as specificed
127 *       * by the ContentHandler.
128 *       *!
129 *      <font color="#0000FF">
130 *      oParser.parse(oInput);  
131 *      </font>
132 * </pre>
133 *
134 * <p>
135 * Copyright &copy; 2000 Cambridge Antibody Technology.
136 *
137 * <p>
138 * Primary author -<ul>
139 * <li>Simon Brocklehurst (CAT)
140 * </ul>
141 * Other authors  -<ul>
142 * <li>Tim Dilks          (CAT)
143 * <li>Colin Hardman      (CAT)
144 * <li>Stuart Johnston    (CAT)
145 * <li>Mathieu Wiepert    (Mayo Foundation)
146 *</ul>
147 *
148 *
149 * @author Cambridge Antibody Technology (CAT)
150 * @version 1.0
151 * 
152 * @see BlastLikeSAXParser
153 * @see SimpleXMLEmitter
154 */
155public class BlastLikeToXMLConverter {
156
157    private String            oInput;
158    private XMLReader         oParser;
159    private boolean           tStrict         = true;
160
161    /**
162     * Creates a new <code>BlastToXMLConverter</code> instance.
163     *
164     */
165    public BlastLikeToXMLConverter(String poInput) {
166    oInput = poInput;
167    }
168
169    public void convert() throws java.io.IOException,
170                                 org.xml.sax.SAXException {
171
172    //Access functionality of biojava classes through
173    //standard org.xml.sax interfaces...
174
175    /**
176     * Create a SAX Parser that takes the native output
177     * from blast-like bioinformatics software.
178     */
179    oParser = (XMLReader) new BlastLikeSAXParser();
180
181    if (tStrict) {
182        ((BlastLikeSAXParser) oParser).setModeStrict();
183    } else {
184        ((BlastLikeSAXParser) oParser).setModeLazy();
185    }
186    /**
187     * Dynamically change configuration of the parser
188     * in regard of Namespace support. Here,
189     * the xml.org/sax/features/namespaces feature is simply "reset"
190     * to its default value for SAX2.
191     * The xml.org/sax/features/namespaces-prefixes feature is
192     * also set to true.  This is to ensure that xmlns attributes
193     * are reported by the parser. These are required because we want
194     * to configure the XMLEmitter to output qualified names (see below).
195     */
196    try {
197        oParser.setFeature("http://xml.org/sax/features/namespaces",true);
198        oParser.setFeature("http://xml.org/sax/features/namespace-prefixes",
199                   true);
200
201    } catch (Exception e) {
202        //If an illegal conmbination of features is chosen,
203        //roll back to default settings. Output a warning,
204        //even though this might mess up the output...
205        System.out.println("WARNING: ignoring attempt to set illegal " +
206                   "combination of parser features");
207        System.out.println(e);
208    }
209    /**
210     * Create an XML ContentHandler. This
211     * implementation of the DocumentHandler
212     * interface simple outputs nicely formatted
213     * XML. Passing a true value to the SimpleXMLEmitter
214     * constructor instructs the ContentHandler
215     * to take QNames from the SAXParser, rather
216     * than LocalNames.
217     */
218    ContentHandler oHandler  = 
219        (ContentHandler) new SimpleXMLEmitter(true);
220
221    /**
222     * Give the parser a reference to the ContentHandler
223     * so that it can send SAX2 mesagges.
224     */
225    oParser.setContentHandler(oHandler);
226    /**
227     * Now make the Parser parse the output from the
228     * blast-like software and emit XML as specificed
229     * by the DocumentHandler.
230     */
231    //Test direct specification of URI
232    //oParser.parse(oInput);  
233
234    
235    
236    //Test direct specification of URI via InputSource
237    //oParser.parse(new InputSource(oInput));  
238
239
240
241    FileInputStream           oInputFileStream;
242    BufferedReader            oContents;
243
244    //Test parsing using ByteSteam as InputSource
245        // Open file and read all lines from file sequentially
246//         try{
247//             oInputFileStream = new FileInputStream(oInput);
248//             // create input stream
249
250//      oParser.parse(new InputSource(oInputFileStream));
251
252//         } catch (java.io.FileNotFoundException x) {
253//             System.out.println(x.getMessage());
254//             System.out.println("Couldn't open file");
255//             System.exit(0);
256//         }
257
258
259    //Test parsing using CharacterStream as InputSource
260        // Open file and read all lines from file sequentially
261        try{
262             oInputFileStream = new FileInputStream(oInput);
263             // create input stream
264             oContents = new
265                 BufferedReader(new InputStreamReader(oInputFileStream));
266
267        oParser.parse(new InputSource(oContents));
268
269         } catch (java.io.FileNotFoundException x) {
270             System.out.println(x.getMessage());
271             System.out.println("Couldn't open file");
272             System.exit(0);
273     }
274
275
276    System.out.println();
277    
278    }
279
280    public void setModeStrict() {
281    tStrict = true;
282    }
283    public void setModeLazy() {
284    tStrict = false;
285
286    }
287
288
289}