001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.program.blast2html; 022 023import java.io.BufferedReader; 024import java.io.IOException; 025import java.io.StringReader; 026 027import org.xml.sax.Attributes; 028import org.xml.sax.SAXException; 029import org.xml.sax.helpers.DefaultHandler; 030 031/** 032 * Takes a SAX event stream and a HTMLRenderer to produce 033 * a HTML Blast like program report. 034 * 035 * 036 * Primary author - 037 * Colin Hardman (CAT) 038 * Other authors - 039 * Tim Dilks (CAT) 040 * Simon Brocklehurst (CAT) 041 * Stuart Johnston (CAT) 042 * Lawerence Bower (CAT) 043 * Derek Crockford (CAT) 044 * Neil Benn (CAT) 045 * 046 * Copyright 2001 Cambridge Antibody Technology Group plc. 047 * 048 * This code released to the biojava project, May 2001 049 * under the LGPL license. 050 * 051 * @author Cambridge Antibody Technology Group plc 052 * @author Greg Cox 053 * @version 1.0 054 */ 055public class Blast2HTMLHandler extends DefaultHandler { 056 057 /** 058 * Variables to hold data while parsing. 059 * 060 */ 061 private StringBuffer sb = new StringBuffer(); 062 063 private String oProgram; 064 private String oVersion; 065 private String oQuery; 066 private String oDatabase; 067 068 private HitSummary oHitSummary = new HitSummary(); 069 private HitId oHitId = new HitId(); 070 private HitDescription oDesc = new HitDescription(); 071 { 072 oHitSummary.oHitId = oHitId; 073 oHitSummary.oDesc = oDesc; 074 } 075 076 private DetailHit oDetailHit = new DetailHit(); 077 private HSP oHSP = new HSP(); 078 private HSPSummary oHSPSummary = new HSPSummary(); 079 private BlastLikeAlignment oAlignment = new BlastLikeAlignment(); 080 private Sequence oQuerySeq = new Sequence(); 081 private Sequence oHitSeq = new Sequence(); 082 083 { 084 oDetailHit.oHitId = oHitId; 085 oDetailHit.oDesc = oDesc; 086 oHSP.oHSPSummary = oHSPSummary; 087 oHSP.oAlignment = oAlignment; 088 oAlignment.oQuerySeq = oQuerySeq; 089 oAlignment.oHitSeq = oHitSeq; 090 091 } 092 093 private String oRawOutput = null; 094 095 // Flow control flags 096 private boolean inCollection = false; 097 private boolean firstSummary = true; 098 private boolean firstDetail = true; 099 100 /** 101 * The Class to render the HTML 102 */ 103 private HTMLRenderer oRenderer = null; 104 105 106 107 /** 108 * A content handler for rendering blast like outputs into 109 * HTML. 110 * 111 * @param poRenderer <code>HTMLRenderer</code> - a configured 112 * HTMLRenderer. 113 */ 114 public Blast2HTMLHandler( HTMLRenderer poRenderer ) { 115 116 if ( poRenderer == null ) { 117 throw new IllegalArgumentException 118 ( "HTMLRenderer cannot be null" ); 119 } 120 oRenderer = poRenderer; 121 } 122 123 124 125 // ************************************************************ // 126 // **** ContentHandler overrides **** // 127 // ************************************************************ // 128 129 /** 130 * This is called when an element is entered. That is, 131 * the parser has met the first tag of the tag pair. 132 * 133 * @param poNameSpace <code>String</code> - the name space. 134 * @param poElementName <code>String</code> - the local name of the tag. 135 * @param poQName <code>String</code> - the fully qualified name 136 * with prefix 137 * @param poAtts an <code>Attributes</code> - the tag attributes. 138 * @exception SAXException if an error occurs 139 */ 140 public void startElement ( String poNameSpace, String poElementName, 141 String poQName, Attributes poAtts) 142 throws SAXException { 143 144 if ( poElementName.equals( "BlastLikeDataSetCollection" ) ) { 145 inCollection = true; // only checking to do - assume once 146 // inside collection it follows DTD 147 } 148 149 if ( inCollection ) { 150 151 if ( poElementName.equals( "BlastLikeDataSet" ) ) { 152 153 oProgram = poAtts.getValue( "program" ); 154 oVersion = poAtts.getValue( "version" ); 155 sb.setLength( 0 ); 156 } else if ( poElementName.equals( "HitSummary" ) ) { 157 158 oHitSummary.score = poAtts.getValue( "score" ); 159 oHitSummary.expectValue = poAtts.getValue( "expectValue" ); 160 161 oHitSummary.numberOfHSPs = poAtts.getValue( "numberOfHSPs" ); 162 oHitSummary.numberOfContributingHSPs = poAtts.getValue 163 ( "numberOfContributingHSPs" ); 164 oHitSummary.smallestSumProbability = poAtts.getValue 165 ( "smallestSumProbability" ); 166 oHitSummary.readingFrame = poAtts.getValue( "readingFrame" ); 167 oHitSummary.numberOfDomains = poAtts.getValue 168 ( "numberOfDomains" ); 169 170 if ( firstSummary ) { 171 oRenderer.startSummaryTable( oHitSummary ); 172 firstSummary = false; 173 } 174 175 // 176 // WHAT happens if there is more than one HSPCollection 177 // per hit - probably won't work. 178 // 179 // 180 } else if ( poElementName.equals( "HSPCollection" ) ) { 181 oRenderer.writeCurrentDetail( oDetailHit ); 182 } else if ( poElementName.equals( "Hit" ) ) { 183 184 if ( firstDetail ) { 185 oRenderer.startDetailTable(); 186 firstDetail = false; 187 } 188 189 oDetailHit.sequenceLength = poAtts.getValue 190 ( "sequenceLength" ); 191 192 } else if ( poElementName.equals( "HSPSummary" ) ) { 193 194 oHSPSummary.percentageIdentity = poAtts.getValue 195 ( "percentageIdentity" ); 196 oHSPSummary.score = poAtts.getValue( "score" ); 197 oHSPSummary.expectValue = poAtts.getValue( "expectValue" ); 198 oHSPSummary.alignmentSize = poAtts.getValue 199 ( "alignmentSize" ); 200 oHSPSummary.numberOfIdentities = poAtts.getValue 201 ( "numberOfIdentities" ); 202 203 oHSPSummary.hitStrand = poAtts.getValue( "hitStrand" ); 204 oHSPSummary.queryStrand = poAtts.getValue( "queryStrand" ); 205 oHSPSummary.queryFrame = poAtts.getValue( "queryFrame" ); 206 oHSPSummary.hitFrame = poAtts.getValue( "hitFrame" ); 207 208 oHSPSummary.numberOfPositives = poAtts.getValue 209 ( "numberOfPositives" ); 210 oHSPSummary.percentagePositives= poAtts.getValue 211 ( "percentagePositives" ); 212 oHSPSummary.pValue = poAtts.getValue( "pValue" ); 213 oHSPSummary.sumPValues = poAtts.getValue( "sumPValues" ); 214 oHSPSummary.numberOfGaps = poAtts.getValue( "numberOfGaps"); 215 216 217 } else if ( poElementName.equals( "HitId" ) ) { 218 oHitId.id = poAtts.getValue( "id" ); 219 oHitId.metaData = poAtts.getValue( "metaData" ); 220 221 } else if ( poElementName.equals( "HitDescription" ) ) { 222 sb.setLength( 0 ); 223 } else if ( poElementName.equals( "QuerySequence" ) ) { 224 oAlignment.oQuerySeq.startPosition = 225 poAtts.getValue( "startPosition" ); 226 oAlignment.oQuerySeq.stopPosition = 227 poAtts.getValue( "stopPosition" ); 228 sb.setLength( 0 ); 229 } else if ( poElementName.equals( "MatchConsensus" ) ) { 230 sb.setLength( 0 ); 231 } else if ( poElementName.equals( "HitSequence" ) ) { 232 oAlignment.oHitSeq.startPosition 233 = poAtts.getValue( "startPosition" ); 234 oAlignment.oHitSeq.stopPosition 235 = poAtts.getValue( "stopPosition" ); 236 sb.setLength( 0 ); 237 } else if ( poElementName.equals( "RawOutput" ) ) { 238 sb.setLength( 0 ); 239 } 240 241 } // end inCollection 242 243 } 244 245 /** 246 * Called when the end of an element is reached. 247 * 248 * @param poNameSpace a <code>String</code> - the name space. 249 * @param poElementName a <code>String</code> - the local element name. 250 * @param poQName a <code>String</code> value - the qualified element name. 251 */ 252 public void endElement ( String poNameSpace, 253 String poElementName, 254 String poQName ) { 255 256 257 if ( poElementName.equals( "Header" ) ) { 258 this.getQueryIdAndDatabase(); 259 oRenderer.writeTitleAndHeader( oProgram, oVersion, 260 oQuery, oDatabase ); 261 262 } else if ( poElementName.equals( "HitDescription" ) ) { 263 oDesc.hitDescription = sb.substring(0); 264 265 } else if ( poElementName.equals( "Summary" ) ) { 266 oRenderer.endSummaryTable(); 267 } else if ( poElementName.equals( "HitSummary" ) ) { 268 oRenderer.writeCurrentSummary( oHitSummary ); 269 } else if ( poElementName.equals( "Detail" ) ) { 270 oRenderer.endDetailTable(); 271 } else if ( poElementName.equals( "RawOutput" ) ) { 272 oRawOutput = sb.substring(0); 273 } else if ( poElementName.equals( "HSPSummary" ) ) { 274 oHSPSummary.rawOutput = oRawOutput; 275 } else if ( poElementName.equals( "QuerySequence" ) ) { 276 oAlignment.oQuerySeq.seq = sb.substring(0); 277 } else if ( poElementName.equals( "MatchConsensus" ) ) { 278 oAlignment.oConsensus = sb.substring(0); 279 } else if ( poElementName.equals( "HitSequence" ) ) { 280 oAlignment.oHitSeq.seq = sb.substring(0); 281 } else if ( poElementName.equals( "HSP" ) ) { 282 oRenderer.writeCurrentHSP( oHSPSummary, oAlignment ); 283 } else if ( poElementName.equals( "BlastLikeDataSetCollection" ) ) { 284 this.reInit(); 285 } 286 287 } // end 288 289 /** 290 * Describe <code>characters</code> method here. 291 * 292 * @param charBuffer - character array containing data. 293 * @param start - the start position of relavent chars in passes array 294 * @param length - the stop position of relavent chars in passes array 295 */ 296 public void characters( char[] charBuffer, int start, int length) { 297 298 // note this may be called more than once for a particular tag 299 // ie loaded in chunks. 300 // So the correct way to handle this stuff is to buffer contents 301 // and deal with buffer when endElement is called. 302 303 sb.append( charBuffer, start, length ); 304 } 305 306 // ************************************************************ // 307 // **** Utility functions **** // 308 // ************************************************************ // 309 310 /** 311 * Re-initializes state, called between parsings. 312 */ 313 void reInit() { 314 inCollection = false; 315 firstSummary = true; 316 firstDetail = true; 317 oRawOutput = null; 318 sb.setLength( 0 ); 319 } 320 321 322 /** 323 * Parses out query and database id's from, the rawoutput. 324 * 325 * Changes in the Sax event generator may have made this redundant. 326 * 327 */ 328 void getQueryIdAndDatabase() { 329 330 BufferedReader reader = new BufferedReader 331 ( new StringReader( sb.substring(0) ) ); 332 333 String oLine; 334 335 try { 336 while (( oLine = reader.readLine() ) != null) { 337 338 if ( oLine.startsWith( "Query=" ) ) { 339 340 int index = oLine.indexOf( "=" ); 341 oQuery = oLine.substring( index+1 ).trim(); 342 continue; 343 } 344 if ( oLine.startsWith( "Database:" ) ) { 345 346 int index = oLine.indexOf( ":" ); 347 oDatabase = oLine.substring( index+1 ).trim(); 348 break; 349 } 350 } 351 } 352 catch ( IOException e ) { 353 printError( e ); 354 } 355 } 356 357 358 /** 359 * Print an error to System.err 360 * 361 * @param e an <code>Exception</code> 362 */ 363 void printError( Exception e ) { 364 365 System.out.println( e.getMessage() ); 366 e.printStackTrace(); 367 } 368 369 370 371} // end class 372 373// ************************************************************ // 374// **** Simple holder classes to hold temporary **** // 375// **** values during parsing **** // 376// ************************************************************ // 377 378 379class DetailHit { 380 381 public String sequenceLength; 382 public HitDescription oDesc; 383 public HitId oHitId; 384 385} 386 387class HitDescription { 388 389 String hitDescription; 390} 391 392class HSP { 393 394 public HSPSummary oHSPSummary; 395 public BlastLikeAlignment oAlignment; 396} 397 398class HSPSummary { 399 400 public String score; 401 public String expectValue; 402 public String numberOfIdentities; 403 public String alignmentSize; 404 public String percentageIdentity; 405 406 public String hitStrand; 407 public String queryStrand; 408 409 public String queryFrame; 410 public String hitFrame; 411 412 public String numberOfPositives; 413 public String percentagePositives; 414 public String pValue; 415 public String sumPValues; 416 public String numberOfGaps; 417 418 public String rawOutput; 419} 420 421class BlastLikeAlignment { 422 423 public Sequence oQuerySeq; 424 public Sequence oHitSeq; 425 public String oConsensus; 426} 427 428class Sequence { 429 430 public String seq; 431 public String startPosition; 432 public String stopPosition; 433} 434 435 436class HitSummary { 437 438 public String score; 439 public String expectValue; 440 public HitId oHitId; 441 public HitDescription oDesc; 442 443 public String numberOfHSPs; 444 public String numberOfContributingHSPs; 445 public String smallestSumProbability; 446 public String readingFrame; 447 public String numberOfDomains; 448 449} 450 451class HitId { 452 453 public String id; 454 public String metaData; 455} 456 457 458