001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojavax.bio.alignment.blast; 022 023import java.io.BufferedReader; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.io.OutputStreamWriter; 028import java.net.MalformedURLException; 029import java.net.URL; 030import java.net.URLConnection; 031import java.util.HashMap; 032 033import org.biojava.bio.BioException; 034import org.biojavax.bio.alignment.RemotePairwiseAlignmentProperties; 035import org.biojavax.bio.alignment.RemotePairwiseAlignmentService; 036import org.biojavax.bio.alignment.RemotePairwiseAlignmentOutputProperties; 037import org.biojavax.bio.seq.RichSequence; 038 039/** 040 * RemoteQBlastService - A simple way of submitting BLAST request to the QBlast 041 * service at NCBI. 042 * 043 * <p> 044 * NCBI provides a Blast server through a CGI-BIN interface. RemoteQBlastService 045 * simply encapsulates an access to it by giving users access to get/set methods 046 * to fix sequence, program and database as well as advanced options. 047 * </p> 048 * 049 * <p> 050 * The philosophy behind RemoteQBlastService is to disconnect submission of 051 * Blast requests from collection of Blast results. This is done so to allow a 052 * user to submit multiple Blast requests while allowing recovery of the reports 053 * at a later time. 054 * </p> 055 * 056 * <p> 057 * Presently, only blastall programs are accessible. blastpgp and megablast are 058 * high-priorities. 059 * </p> 060 * 061 * @author Sylvain Foisy, Diploide BioIT 062 * @since 1.8 063 * 064 */ 065public class RemoteQBlastService implements RemotePairwiseAlignmentService { 066 067 private static String baseurl = "http://blast.ncbi.nlm.nih.gov/Blast.cgi"; 068 private URL aUrl; 069 private URLConnection uConn; 070 private OutputStreamWriter fromQBlast; 071 private BufferedReader rd; 072 073 private String email = "anonymous@biojava.org"; 074 private String tool = "biojavax"; 075 076 077 private String seq = null; 078 private String prog = null; 079 private String db = null; 080// private String advanced = null; 081 082 private String rid; 083 private long step; 084 private long start; 085 private HashMap<String, Long> holder = new HashMap<String, Long>(); 086 087 /** 088 * The constructor for a QBlast service request. 089 * 090 * @throws BioException 091 * if the NCBI URL is unresponsive 092 * 093 */ 094 public RemoteQBlastService() throws BioException { 095 try { 096 aUrl = new URL(baseurl); 097 uConn = setQBlastServiceProperties(aUrl.openConnection()); 098 } 099 /* 100 * Needed but should never be thrown since the URL is static and known 101 * to exist 102 */ 103 catch (MalformedURLException e) { 104 throw new BioException( 105 "It looks like the URL for NCBI QBlast service is wrong.\n"); 106 } 107 /* 108 * Intercept if the program can't connect to QBlast service 109 */ 110 catch (IOException e) { 111 throw new BioException( 112 "Impossible to connect to QBlast service at this time. Check your network connection.\n"); 113 } 114 } 115 116 /** 117 * This class is the actual worker doing all the dirty stuff related to 118 * sending the Blast request to the CGI_BIN interface. It should never be 119 * used as is but any method wanting to send a Blast request should manage 120 * to use it by feeding it the right parameters. 121 * 122 * @param str : a <code>String</code> representation of a sequence from either of the 123 * three wrapper methods 124 * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object 125 * @return rid : the ID of this request on the NCBI QBlast server 126 * @throws BioException if unable to connect to the NCBI QBlast service 127 */ 128 private String sendActualAlignementRequest(String str, 129 RemotePairwiseAlignmentProperties rpa) throws BioException { 130 131 seq = "QUERY=" + str; 132 prog = "PROGRAM=" + rpa.getAlignmentOption("PROGRAM"); 133 db = "DATABASE=" + rpa.getAlignmentOption("DATABASE"); 134 135 if (prog == null || db == null || str == null || str.length() == 0) { 136 throw new BioException( 137 "Impossible to execute QBlast request. One or more of sequence|database|program has not been set correctly.\n"); 138 } 139 140 String cmd = "CMD=Put&SERVICE=plain" + "&" + seq + "&" + prog + "&" 141 + db + "&" + "FORMAT_TYPE=HTML"+"&TOOL="+getTool()+"&EMAIL="+getEmail(); 142 143 // This is a not so good hack to be fix by forcing key 144 // checking in RemoteQBlastAlignmentProperties 145 if (rpa.getAlignmentOption("OTHER_ADVANCED") != null) { 146 cmd += cmd + "&" + rpa.getAlignmentOption("OTHER_ADVANCED"); 147 } 148 149 try { 150 151 uConn = setQBlastServiceProperties(aUrl.openConnection()); 152 153 fromQBlast = new OutputStreamWriter(uConn.getOutputStream()); 154 155 fromQBlast.write(cmd); 156 fromQBlast.flush(); 157 158 // Get the response 159 rd = new BufferedReader(new InputStreamReader(uConn 160 .getInputStream())); 161 162 String line = ""; 163 164 while ((line = rd.readLine()) != null) { 165 if (line.contains("RID")) { 166 String[] arr = line.split("="); 167 rid = arr[1].trim(); 168 } else if (line.contains("RTOE")) { 169 String[] arr = line.split("="); 170 step = Long.parseLong(arr[1].trim()) * 1000; 171 start = System.currentTimeMillis() + step; 172 } 173 holder.put(rid, start); 174 } 175 } catch (IOException e) { 176 throw new BioException( 177 "Can't submit sequence to BLAST server at this time.\n"); 178 } 179 180 return rid; 181 } 182 183 /** 184 * This method is a wrapper that executes the Blast request via the Put 185 * command of the CGI-BIN interface with the specified parameters and a 186 * string representing the sequence. It gets the estimated time of 187 * completion by capturing the value of the RTOE variable and sets a loop 188 * that will check for completion of analysis at intervals specified by 189 * RTOE. 190 * 191 * <p> 192 * It also capture the value for the RID variable, necessary for fetching 193 * the actual results after completion. 194 * </p> 195 * 196 * @param str : a <code>String</code> with a sequence 197 * @param rpa : a <code>RemotePairwiseAlignmentProperties</code> object 198 * @return rid : a <code>String</code> with the request ID for this sequence 199 * @throws BioException if it is not possible to sent the BLAST command 200 */ 201 public String sendAlignmentRequest(String str, 202 RemotePairwiseAlignmentProperties rpa) throws BioException { 203 204 /* 205 * sending the command to execute the Blast analysis 206 */ 207 return rid = sendActualAlignementRequest(seq, rpa); 208 } 209 210 /** 211 * This method is a wrapper that executes the Blast request via the Put 212 * command of the CGI-BIN interface with the specified parameters and a 213 * RichSequence. It gets the estimated time of completion by capturing the 214 * value of the RTOE variable and sets a loop that will check for completion 215 * of analysis at intervals specified by RTOE. 216 * 217 * <p> 218 * It also capture the value for the RID variable, necessary for fetching 219 * the actual results after completion. 220 * </p> 221 * 222 * @param rs :a <code>RichSequence</code> object 223 * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object 224 * @return rid : a <code>String</code> with the request ID for this sequence 225 * 226 * @throws BioException if it is not possible to sent the BLAST command 227 */ 228 public String sendAlignmentRequest(RichSequence rs, 229 RemotePairwiseAlignmentProperties rpa) throws BioException { 230 231 seq = rs.seqString(); 232 233 return rid = sendActualAlignementRequest(seq, rpa); 234 } 235 236 /** 237 * This method is a wrapper that executes the Blast request via the Put 238 * command of the CGI-BIN interface with the specified parameters and a 239 * GenBank GID. It gets the estimated time of completion by capturing the 240 * value of the RTOE variable and sets a loop that will check for completion 241 * of analysis at intervals specified by RTOE. 242 * 243 * <p> 244 * It also capture the value for the RID variable, necessary for fetching 245 * the actual results after completion. 246 * </p> 247 * 248 * @param gid :an integer with a Genbank GID 249 * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object 250 * @return rid : a String with the request ID for this sequence 251 * @throws BioException if it is not possible to sent the BLAST command 252 */ 253 public String sendAlignmentRequest(int gid, 254 RemotePairwiseAlignmentProperties rpa) throws BioException { 255 256 seq = Integer.toString(gid); 257 return rid = sendActualAlignementRequest(seq, rpa); 258 } 259 260 /** 261 * <p> 262 * This method is used only for the executeBlastSearch method to check for 263 * completion of request using the NCBI specified RTOE variable 264 * </p> 265 * 266 * @param id : a valid request ID 267 * @param present : a representation of "now" using System.currentTimeMillis(). 268 * @return a boolean value telling if the request has been completed or not. 269 */ 270 public boolean isReady(String id, long present) throws BioException { 271 boolean ready = false; 272 String check = "CMD=Get&RID=" + id; 273 274 if (holder.containsKey(id)) { 275 /* 276 * If present time is less than the start of the search added to 277 * step obtained from NCBI, just do nothing ;-) 278 * 279 * This is done so that we do not send zillions of requests to the 280 * server. We do the waiting internally first. 281 */ 282 if (present < start) { 283 ready = false; 284 } 285 /* 286 * If we are at least step seconds in the future from the actual 287 * call sendAlignementRequest() 288 */ 289 else { 290 try { 291 uConn = setQBlastServiceProperties(aUrl.openConnection()); 292 293 fromQBlast = new OutputStreamWriter(uConn.getOutputStream()); 294 fromQBlast.write(check); 295 fromQBlast.flush(); 296 297 rd = new BufferedReader(new InputStreamReader(uConn 298 .getInputStream())); 299 300 String line = ""; 301 302 while ((line = rd.readLine()) != null) { 303 if (line.contains("READY")) { 304 ready = true; 305 } else if (line.contains("WAITING")) { 306 /* 307 * Else, move start forward in time... for the next 308 * iteration 309 */ 310 start = present + step; 311 holder.put(id, start); 312 } 313 } 314 } catch (IOException e) { 315 e.printStackTrace(); 316 } 317 } 318 } else { 319 throw new BioException("Impossible to check for request ID named " 320 + id + " because it does not exists!\n"); 321 } 322 return ready; 323 } 324 325 /** 326 * <p> 327 * This method extracts the actual Blast report for this request ID. It uses 328 * an object implementing the RemotePairwiseAlignmentOutputProperties 329 * interface which will specify output formatting options. 330 * </p> 331 * 332 * @param id :a valid request ID 333 * @param rb : a <code>RemotePairwiseAlignmentOutputProperties</code> that 334 * will specify specific output formatting commands 335 * @return an <code>InputStream</code> that can be use any way one might 336 * desire 337 * @throws BioException if it is not possible to recover the results. 338 */ 339 public InputStream getAlignmentResults(String id, 340 RemotePairwiseAlignmentOutputProperties rb) throws BioException { 341 if (holder.containsKey(id)) { 342 String srid = "CMD=Get&RID=" + id + "&" 343 + rb.getOutputOption("FORMAT_TYPE") + "&" 344 + rb.getOutputOption("ALIGNMENT_VIEW") + "&" 345 + rb.getOutputOption("DESCRIPTIONS") + "&" 346 + rb.getOutputOption("ALIGNMENTS") 347 + "&TOOL="+getTool()+"&EMAIL="+getEmail(); 348 349 try { 350 uConn = setQBlastServiceProperties(aUrl.openConnection()); 351 352 fromQBlast = new OutputStreamWriter(uConn.getOutputStream()); 353 fromQBlast.write(srid); 354 fromQBlast.flush(); 355 356 return uConn.getInputStream(); 357 358 } catch (IOException ioe) { 359 throw new BioException( 360 "It is not possible to fetch Blast report from NCBI at this time.\n"); 361 } 362 } else { 363 throw new BioException( 364 "Impossible to get output for request ID named " + id 365 + " because it does not exists!\n"); 366 } 367 } 368 369 /** 370 * A simple method to check the availability of the QBlast service 371 * 372 * @throws BioException if unable to connect to the NCBI QBlast service 373 */ 374 public void printRemoteBlastInfo() throws BioException { 375 try { 376 OutputStreamWriter out = new OutputStreamWriter(uConn 377 .getOutputStream()); 378 379 out.write("CMD=Info"); 380 out.flush(); 381 382 // Get the response 383 BufferedReader rd = new BufferedReader(new InputStreamReader(uConn 384 .getInputStream())); 385 386 String line = ""; 387 388 while ((line = rd.readLine()) != null) { 389 System.out.println(line); 390 } 391 392 out.close(); 393 rd.close(); 394 } catch (IOException e) { 395 throw new BioException( 396 "Impossible to get info from QBlast service at this time. Check your network connection.\n"); 397 } 398 } 399 400 private URLConnection setQBlastServiceProperties(URLConnection conn) { 401 402 URLConnection tmp = conn; 403 404 conn.setDoOutput(true); 405 conn.setUseCaches(false); 406 407 tmp.setRequestProperty("User-Agent", "Biojava/RemoteQBlastService"); 408 tmp.setRequestProperty("Connection", "Keep-Alive"); 409 tmp.setRequestProperty("Content-type", 410 "application/x-www-form-urlencoded"); 411 tmp.setRequestProperty("Content-length", "200"); 412 413 return tmp; 414 } 415 /** 416 * Set the tool identifier for QBlast. Defaults to 'biojavax'. 417 * @param tool the new identifier. 418 */ 419 public void setTool(String tool) { 420 this.tool = tool; 421 } 422 423 /** 424 * Get the tool identifier for QBlast. Defaults to 'biojavax'. 425 * @return the identifier. 426 */ 427 public String getTool() { 428 return this.tool; 429 } 430 431 /** 432 * Set the email for QBlast. Defaults to 'anonymous@biojava.org'. 433 * @param email the new email. 434 */ 435 public void setEmail(String email) { 436 this.email = email; 437 } 438 439 /** 440 * Get the email for QBlast. Defaults to 'anonymous@biojava.org'. 441 * @return the email. 442 */ 443 public String getEmail() { 444 return this.email; 445 } 446}