001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.bio.program.blast2html;
022
023import java.io.BufferedReader;
024import java.io.IOException;
025import java.io.StringReader;
026
027import org.xml.sax.Attributes;
028import org.xml.sax.SAXException;
029import org.xml.sax.helpers.DefaultHandler;
030
031/**
032 * Takes a SAX event stream and a HTMLRenderer to produce
033 * a HTML Blast like program report.
034 *
035 *
036 * Primary author -
037 *                 Colin Hardman      (CAT)
038 * Other authors  -
039 *                 Tim Dilks          (CAT)
040 *                 Simon Brocklehurst (CAT)
041 *                 Stuart Johnston    (CAT)
042 *                 Lawerence Bower    (CAT)
043 *                 Derek Crockford    (CAT)
044 *                 Neil Benn          (CAT)
045 *
046 * Copyright 2001 Cambridge Antibody Technology Group plc.
047 *
048 * This code released to the biojava project, May 2001
049 * under the LGPL license.
050 *
051 * @author Cambridge Antibody Technology Group plc
052 * @author Greg Cox
053 * @version 1.0
054 */
055public class Blast2HTMLHandler extends DefaultHandler  {
056
057    /**
058     * Variables to hold data while parsing.
059     *
060     */
061    private StringBuffer sb = new StringBuffer();
062
063    private String oProgram;
064    private String oVersion;
065    private String oQuery;
066    private String oDatabase;
067
068    private HitSummary oHitSummary = new HitSummary();
069    private HitId      oHitId      = new HitId();
070    private HitDescription oDesc   = new HitDescription();
071    {
072        oHitSummary.oHitId = oHitId;
073        oHitSummary.oDesc  = oDesc;
074    }
075
076    private DetailHit oDetailHit = new DetailHit();
077    private HSP       oHSP       = new HSP();
078    private HSPSummary oHSPSummary = new HSPSummary();
079    private BlastLikeAlignment oAlignment = new BlastLikeAlignment();
080    private Sequence oQuerySeq = new Sequence();
081    private Sequence oHitSeq = new Sequence();
082
083    {
084        oDetailHit.oHitId = oHitId;
085        oDetailHit.oDesc  = oDesc;
086        oHSP.oHSPSummary = oHSPSummary;
087        oHSP.oAlignment   = oAlignment;
088        oAlignment.oQuerySeq = oQuerySeq;
089        oAlignment.oHitSeq = oHitSeq;
090
091    }
092
093    private String oRawOutput = null;
094
095    // Flow control flags
096    private boolean inCollection = false;
097    private boolean firstSummary = true;
098    private boolean firstDetail = true;
099
100    /**
101     * The Class to render the HTML
102     */
103    private HTMLRenderer oRenderer = null;
104
105
106
107    /**
108     * A content handler for rendering blast like outputs into
109     * HTML.
110     *
111     * @param poRenderer <code>HTMLRenderer</code> - a configured
112     *                   HTMLRenderer.
113     */
114    public Blast2HTMLHandler( HTMLRenderer poRenderer ) {
115
116        if ( poRenderer == null ) {
117            throw new IllegalArgumentException
118                ( "HTMLRenderer cannot be null" );
119        }
120        oRenderer = poRenderer;
121    }
122
123
124
125    // ************************************************************ //
126    // ****            ContentHandler overrides                **** //
127    // ************************************************************ //
128
129    /**
130     * This is called when an element is entered. That is,
131     * the parser has met the first tag of the tag pair.
132     *
133     * @param poNameSpace <code>String</code> - the name space.
134     * @param poElementName <code>String</code> - the local name of the tag.
135     * @param poQName  <code>String</code> - the fully qualified name
136     *                                       with prefix
137     * @param poAtts an <code>Attributes</code> - the tag attributes.
138     * @exception SAXException if an error occurs
139     */
140    public void startElement ( String poNameSpace, String poElementName,
141                               String poQName, Attributes poAtts)
142        throws SAXException {
143
144        if ( poElementName.equals( "BlastLikeDataSetCollection" ) ) {
145            inCollection = true; // only checking to do - assume once
146                                 // inside collection it follows DTD
147        }
148
149        if ( inCollection ) {
150
151            if ( poElementName.equals( "BlastLikeDataSet" ) ) {
152
153                oProgram = poAtts.getValue( "program" );
154                oVersion = poAtts.getValue( "version" );
155                sb.setLength( 0 );
156            } else if ( poElementName.equals( "HitSummary" ) ) {
157
158                oHitSummary.score       = poAtts.getValue( "score" );
159                oHitSummary.expectValue = poAtts.getValue( "expectValue" );
160
161                oHitSummary.numberOfHSPs = poAtts.getValue( "numberOfHSPs" );
162                oHitSummary.numberOfContributingHSPs = poAtts.getValue
163                    ( "numberOfContributingHSPs" );
164                oHitSummary.smallestSumProbability = poAtts.getValue
165                    ( "smallestSumProbability" );
166                oHitSummary.readingFrame = poAtts.getValue( "readingFrame" );
167                oHitSummary.numberOfDomains = poAtts.getValue
168                    ( "numberOfDomains" );
169
170                if ( firstSummary ) {
171                    oRenderer.startSummaryTable( oHitSummary );
172                    firstSummary = false;
173                }
174
175                //
176                // WHAT happens if there is more than one HSPCollection
177                // per hit - probably won't work.
178                //
179                //
180            } else if ( poElementName.equals( "HSPCollection" ) ) {
181                oRenderer.writeCurrentDetail( oDetailHit );
182            } else if ( poElementName.equals( "Hit" ) ) {
183
184                if ( firstDetail ) {
185                    oRenderer.startDetailTable();
186                    firstDetail = false;
187                }
188
189                oDetailHit.sequenceLength = poAtts.getValue
190                    ( "sequenceLength" );
191
192            } else if ( poElementName.equals( "HSPSummary" ) ) {
193
194                oHSPSummary.percentageIdentity  = poAtts.getValue
195                    ( "percentageIdentity" );
196                oHSPSummary.score       = poAtts.getValue( "score" );
197                oHSPSummary.expectValue = poAtts.getValue( "expectValue" );
198                oHSPSummary.alignmentSize  = poAtts.getValue
199                    ( "alignmentSize" );
200                oHSPSummary.numberOfIdentities = poAtts.getValue
201                    ( "numberOfIdentities" );
202
203                oHSPSummary.hitStrand       = poAtts.getValue( "hitStrand" );
204                oHSPSummary.queryStrand     = poAtts.getValue( "queryStrand" );
205                oHSPSummary.queryFrame      = poAtts.getValue( "queryFrame" );
206                oHSPSummary.hitFrame        = poAtts.getValue( "hitFrame" );
207
208                oHSPSummary.numberOfPositives  = poAtts.getValue
209                    ( "numberOfPositives" );
210                oHSPSummary.percentagePositives= poAtts.getValue
211                    ( "percentagePositives" );
212                oHSPSummary.pValue          = poAtts.getValue( "pValue" );
213                oHSPSummary.sumPValues      = poAtts.getValue( "sumPValues" );
214                oHSPSummary.numberOfGaps    = poAtts.getValue( "numberOfGaps");
215
216
217            } else if ( poElementName.equals( "HitId" ) ) {
218                oHitId.id = poAtts.getValue( "id" );
219                oHitId.metaData = poAtts.getValue( "metaData" );
220
221            } else if ( poElementName.equals( "HitDescription" ) ) {
222                sb.setLength( 0 );
223            } else if ( poElementName.equals( "QuerySequence" ) ) {
224                oAlignment.oQuerySeq.startPosition =
225                    poAtts.getValue( "startPosition" );
226                oAlignment.oQuerySeq.stopPosition  =
227                    poAtts.getValue( "stopPosition" );
228                sb.setLength( 0 );
229            } else if ( poElementName.equals( "MatchConsensus" ) ) {
230                sb.setLength( 0 );
231            } else if ( poElementName.equals( "HitSequence" ) ) {
232                oAlignment.oHitSeq.startPosition
233                    = poAtts.getValue( "startPosition" );
234                oAlignment.oHitSeq.stopPosition
235                    = poAtts.getValue( "stopPosition" );
236                sb.setLength( 0 );
237            }  else if ( poElementName.equals( "RawOutput" ) ) {
238                sb.setLength( 0 );
239            }
240
241        } // end inCollection
242
243    }
244
245    /**
246     * Called when the end of an element is reached.
247     *
248     * @param poNameSpace a <code>String</code> - the name space.
249     * @param poElementName a <code>String</code> - the local element name.
250     * @param poQName a <code>String</code> value - the qualified element name.
251     */
252    public void endElement ( String poNameSpace,
253                             String poElementName,
254                             String poQName ) {
255
256
257        if ( poElementName.equals( "Header" ) ) {
258            this.getQueryIdAndDatabase();
259            oRenderer.writeTitleAndHeader( oProgram, oVersion,
260                                      oQuery, oDatabase );
261
262        } else  if ( poElementName.equals( "HitDescription" ) ) {
263            oDesc.hitDescription = sb.substring(0);
264
265        } else  if ( poElementName.equals( "Summary" ) ) {
266            oRenderer.endSummaryTable();
267        } else  if ( poElementName.equals( "HitSummary" ) ) {
268            oRenderer.writeCurrentSummary( oHitSummary );
269        } else  if ( poElementName.equals( "Detail" ) ) {
270            oRenderer.endDetailTable();
271        } else  if ( poElementName.equals( "RawOutput" ) ) {
272            oRawOutput  = sb.substring(0);
273        } else  if ( poElementName.equals( "HSPSummary" ) ) {
274            oHSPSummary.rawOutput = oRawOutput;
275        } else if ( poElementName.equals( "QuerySequence" ) ) {
276            oAlignment.oQuerySeq.seq = sb.substring(0);
277        } else if ( poElementName.equals( "MatchConsensus" ) ) {
278            oAlignment.oConsensus = sb.substring(0);
279        } else if ( poElementName.equals( "HitSequence" ) ) {
280            oAlignment.oHitSeq.seq = sb.substring(0);
281        } else if ( poElementName.equals( "HSP" ) ) {
282            oRenderer.writeCurrentHSP( oHSPSummary, oAlignment );
283        } else  if ( poElementName.equals( "BlastLikeDataSetCollection" ) ) {
284            this.reInit();
285        }
286
287    } // end
288
289    /**
290     * Describe <code>characters</code> method here.
291     *
292     * @param charBuffer - character array containing data.
293     * @param start - the start position of relavent chars in passes array
294     * @param length - the stop position of relavent chars in passes array
295     */
296    public void characters( char[] charBuffer, int start, int length) {
297
298        // note this may be called more than once for a particular tag
299        // ie loaded in chunks.
300        // So the correct way to handle this stuff is to buffer contents
301        // and deal with buffer when endElement is called.
302
303        sb.append( charBuffer, start, length );
304    }
305
306    // ************************************************************ //
307    // ****                 Utility functions                  **** //
308    // ************************************************************ //
309
310    /**
311     * Re-initializes state, called between parsings.
312     */
313    void reInit() {
314        inCollection = false;
315        firstSummary = true;
316        firstDetail = true;
317        oRawOutput = null;
318        sb.setLength( 0 );
319    }
320
321
322    /**
323     * Parses out query and database id's from, the rawoutput.
324     *
325     * Changes in the Sax event generator may have made this redundant.
326     *
327     */
328    void getQueryIdAndDatabase() {
329
330        BufferedReader reader = new BufferedReader
331            ( new StringReader( sb.substring(0) ) );
332
333        String oLine;
334
335        try {
336            while (( oLine = reader.readLine() ) != null) {
337
338                if ( oLine.startsWith( "Query=" ) ) {
339
340                    int index = oLine.indexOf( "=" );
341                    oQuery = oLine.substring( index+1 ).trim();
342                    continue;
343                }
344                if ( oLine.startsWith( "Database:" ) ) {
345
346                    int index = oLine.indexOf( ":" );
347                    oDatabase = oLine.substring( index+1 ).trim();
348                    break;
349                }
350            }
351        }
352        catch ( IOException e ) {
353            printError( e );
354        }
355    }
356
357
358    /**
359     * Print an error to System.err
360     *
361     * @param e an <code>Exception</code>
362     */
363    void printError( Exception e ) {
364
365        System.out.println( e.getMessage() );
366        e.printStackTrace();
367    }
368
369
370
371} // end class
372
373// ************************************************************ //
374// ****        Simple holder classes to hold temporary     **** //
375// ****                 values  during parsing             **** //
376// ************************************************************ //
377
378
379class DetailHit {
380
381    public String sequenceLength;
382    public HitDescription oDesc;
383    public HitId  oHitId;
384
385}
386
387class HitDescription {
388
389    String hitDescription;
390}
391
392class HSP {
393
394    public HSPSummary oHSPSummary;
395    public BlastLikeAlignment oAlignment;
396}
397
398class HSPSummary {
399
400    public String score;
401    public String expectValue;
402    public String numberOfIdentities;
403    public String alignmentSize;
404    public String percentageIdentity;
405
406    public String hitStrand;
407    public String queryStrand;
408
409    public String queryFrame;
410    public String hitFrame;
411
412    public String numberOfPositives;
413    public String percentagePositives;
414    public String pValue;
415    public String sumPValues;
416    public String numberOfGaps;
417
418    public String rawOutput;
419}
420
421class BlastLikeAlignment {
422
423    public Sequence oQuerySeq;
424    public Sequence oHitSeq;
425    public String   oConsensus;
426}
427
428class Sequence {
429
430    public String seq;
431    public String startPosition;
432    public String stopPosition;
433}
434
435
436class HitSummary {
437
438    public String score;
439    public String expectValue;
440    public HitId  oHitId;
441    public HitDescription oDesc;
442
443    public String numberOfHSPs;
444    public String numberOfContributingHSPs;
445    public String smallestSumProbability;
446    public String readingFrame;
447    public String numberOfDomains;
448
449}
450
451class HitId {
452
453    public String id;
454    public String metaData;
455}
456
457
458