001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.nbio.ws.alignment.qblast; 023 024import org.biojava.nbio.core.sequence.io.util.IOUtils; 025import org.biojava.nbio.core.sequence.template.Compound; 026import org.biojava.nbio.core.sequence.template.Sequence; 027import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentOutputProperties; 028import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentProperties; 029import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentService; 030 031import java.io.*; 032import java.net.MalformedURLException; 033import java.net.URL; 034import java.net.URLConnection; 035import java.util.HashMap; 036import java.util.Map; 037 038import static org.biojava.nbio.ws.alignment.qblast.BlastAlignmentParameterEnum.*; 039import static org.biojava.nbio.ws.alignment.qblast.BlastOutputParameterEnum.RID; 040 041/** 042 * Provides a simple way of submitting BLAST request to the QBlast service at NCBI. 043 * <p> 044 * NCBI provides a Blast server through a CGI-BIN interface. This service simply encapsulates an access to it by giving 045 * users access to get/set methods to fix sequence, program and database as well as advanced options. 046 * </p> 047 * <p> 048 * The philosophy behind this service is to disconnect submission of Blast requests from collection of Blast results. 049 * This is done so to allow a user to submit multiple Blast requests while allowing recovery of the reports at a later 050 * time. 051 * </p> 052 * <p> 053 * Presently, only blastall programs are accessible. 054 * </p> 055 * 056 * @author Sylvain Foisy, Diploide BioIT 057 * @author Gediminas Rimsa 058 */ 059public class NCBIQBlastService implements RemotePairwiseAlignmentService { 060 /** 061 * Number of milliseconds by which expected job execution time is incremented if it is not finished yet. Subsequent 062 * calls to {@link #isReady(String, long)} method will return false until at least this much time passes. 063 */ 064 public static final long WAIT_INCREMENT = 3000; 065 066 private static final MapToStringTransformer MAP_TO_STRING_TRANSFORMER = new MapToStringTransformer(); 067 private static final String SERVICE_URL = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"; 068 private static final String DEFAULT_EMAIL = "anonymous@biojava.org"; 069 private static final String DEFAULT_TOOL = "biojava5"; 070 071 private URL serviceUrl; 072 private String email = DEFAULT_EMAIL; 073 private String tool = DEFAULT_TOOL; 074 075 private Map<String, BlastJob> jobs = new HashMap<String, BlastJob>(); 076 077 /** Constructs a service object that targets the public NCBI BLAST network 078 * service. 079 */ 080 public NCBIQBlastService() { 081 init(SERVICE_URL); 082 } 083 084 /** Constructs a service object which targets a custom NCBI BLAST network 085 * service (e.g.: an instance of BLAST in the cloud). 086 * 087 * @param svcUrl : a {@code String} containing the base URL to send requests to, 088 * e.g.: http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi 089 * 090 * @see <a href="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast">BLAST on the cloud documentation</a> 091 */ 092 public NCBIQBlastService(String svcUrl) { 093 init(svcUrl); 094 } 095 096 /** Initialize the serviceUrl data member 097 * @throws MalformedURLException on invalid URL 098 */ 099 private void init(String svcUrl) { 100 try { 101 serviceUrl = new URL(svcUrl); 102 } catch (MalformedURLException e) { 103 throw new RuntimeException("It looks like the URL for remote NCBI BLAST service (" 104 + svcUrl + ") is wrong. Cause: " + e.getMessage(), e); 105 } 106 } 107 108 /** 109 * A simple method to check the availability of the QBlast service. Sends {@code Info} command to QBlast 110 * 111 * @return QBlast info output concatenated to String 112 * @throws Exception if unable to connect to the NCBI QBlast service 113 */ 114 public String getRemoteBlastInfo() throws Exception { 115 OutputStreamWriter writer = null; 116 BufferedReader reader = null; 117 try { 118 URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection()); 119 writer = new OutputStreamWriter(serviceConnection.getOutputStream()); 120 writer.write("CMD=Info"); 121 writer.flush(); 122 reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream())); 123 StringBuilder sb = new StringBuilder(); 124 String line; 125 while ((line = reader.readLine()) != null) { 126 sb.append(line); 127 sb.append(System.getProperty("line.separator")); 128 } 129 return sb.toString(); 130 } catch (IOException e) { 131 throw new Exception("Impossible to get info from QBlast service at this time. Cause: " + e.getMessage(), e); 132 } finally { 133 IOUtils.close(reader); 134 IOUtils.close(writer); 135 } 136 } 137 138 /** 139 * Converts given sequence to String and calls 140 * {@link #sendAlignmentRequest(String, RemotePairwiseAlignmentProperties)} 141 */ 142 @Override 143 public String sendAlignmentRequest(Sequence<Compound> seq, RemotePairwiseAlignmentProperties rpa) throws Exception { 144 return sendAlignmentRequest(seq.getSequenceAsString(), rpa); 145 } 146 147 /** 148 * Converts given GenBank GID to String and calls 149 * {@link #sendAlignmentRequest(String, RemotePairwiseAlignmentProperties)} 150 */ 151 public String sendAlignmentRequest(int gid, RemotePairwiseAlignmentProperties rpa) throws Exception { 152 return sendAlignmentRequest(Integer.toString(gid), rpa); 153 } 154 155 /** 156 * Sends the Blast request via the Put command of the CGI-BIN interface. Uses all of the parameters specified in 157 * {@code alignmentProperties} (parameters PROGRAM and DATABASE are required). 158 * 159 * @param query : a {@code String} representing a sequence or Genbank ID 160 * @param alignmentProperties : a {@code RemotePairwiseAlignmentProperties} object representing alignment properties 161 * @return the request id for this sequence, necessary to fetch results after completion 162 * @throws Exception if unable to connect to the NCBI QBlast service or if no sequence or required parameters 163 * PROGRAM and DATABASE are not set 164 */ 165 @Override 166 public String sendAlignmentRequest(String query, RemotePairwiseAlignmentProperties alignmentProperties) throws Exception { 167 Map<String, String> params = new HashMap<String, String>(); 168 for (String key : alignmentProperties.getAlignmentOptions()) { 169 params.put(key, alignmentProperties.getAlignmentOption(key)); 170 } 171 172 if (query == null || query.isEmpty()) { 173 throw new IllegalArgumentException("Impossible to execute QBlast request. The sequence has not been set."); 174 } 175 if (!params.containsKey(PROGRAM.name())) { 176 throw new IllegalArgumentException("Impossible to execute QBlast request. Parameter PROGRAM has not been set."); 177 } 178 if (!params.containsKey(DATABASE.name())) { 179 throw new IllegalArgumentException("Impossible to execute QBlast request. Parameter DATABASE has not been set."); 180 } 181 182 params.put(CMD.name(), "Put"); 183 params.put(QUERY.name(), query); 184 params.put(TOOL.name(), getTool()); 185 params.put(EMAIL.name(), getEmail()); 186 187 String putCmd = MAP_TO_STRING_TRANSFORMER.transform(params); 188 189 OutputStreamWriter writer = null; 190 BufferedReader reader = null; 191 try { 192 URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection()); 193 writer = new OutputStreamWriter(serviceConnection.getOutputStream()); 194 writer.write(putCmd); 195 writer.flush(); 196 197 // Get the response 198 reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream())); 199 200 // find request id and time of execution 201 BlastJob job = new BlastJob(); 202 String line; 203 while ((line = reader.readLine()) != null) { 204 if (!line.contains("class=\"error\"") && !line.contains("Message ID#")) { 205 // if there is no error, capture RID and RTOE 206 if (line.contains("RID = ")) { 207 String[] arr = line.split("="); 208 job.setId(arr[1].trim()); 209 } else if (line.contains("RTOE = ")) { 210 String[] arr = line.split("="); 211 job.setStartTimestamp(System.currentTimeMillis()); 212 job.setExpectedExecutionTime(Long.parseLong(arr[1].trim()) * 1000); 213 } 214 jobs.put(job.getId(), job); 215 } else { 216 // handle QBlast error message 217 218 // Capture everything to the left of this HTML statement... 219 String[] tmp = line.split("</p></li></ul>"); 220 221 // Only the error message is on the right side of this... 222 String[] moreTmp = tmp[0].split("<p class=\"error\">"); 223 throw new Exception("NCBI QBlast refused this request because: " + moreTmp[1].trim()); 224 } 225 226 } 227 if (job != null && job.getId() != null) { 228 return job.getId(); 229 } else { 230 throw new Exception("Unable to retrieve request ID"); 231 } 232 } catch (IOException e) { 233 throw new IOException("An error occured submiting sequence to BLAST server. Cause: " + e.getMessage(), e); 234 } finally { 235 IOUtils.close(reader); 236 IOUtils.close(writer); 237 } 238 } 239 240 /** 241 * Wrapper method for {@link #isReady(String, long)}, omitting unnecessary {@code present} property. 242 * 243 * @see #isReady(String, long) 244 */ 245 public boolean isReady(String id) throws Exception { 246 return isReady(id, 0); 247 } 248 249 /** 250 * Checks for completion of request. 251 * <p/> 252 * If expected execution time (RTOE) is available for request, this method will always return false until that time 253 * passes. This is done to prevent sending unnecessary requests to the server. 254 * 255 * @param id : request id, which was returned by {@code sendAlignmentRequest} method 256 * @param present : is not used, can be any value 257 * @return a boolean value telling if the request has been completed 258 */ 259 @Override 260 public boolean isReady(String id, long present) throws Exception { 261 BlastJob job = jobs.get(id); 262 if (job != null) { 263 long expectedJobFinishTime = job.getStartTimestamp() + job.getExpectedExecutionTime(); 264 if (System.currentTimeMillis() < expectedJobFinishTime) { 265 return false; 266 } 267 } else { 268 // it might be a valid job from another session 269 job = new BlastJob(); 270 job.setId(id); 271 job.setStartTimestamp(System.currentTimeMillis()); 272 job.setExpectedExecutionTime(0); 273 } 274 275 OutputStreamWriter writer = null; 276 BufferedReader reader = null; 277 try { 278 String checkRequest = "CMD=Get&RID=" + job.getId() + "&FORMAT_OBJECT=SearchInfo"; 279 URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection()); 280 writer = new OutputStreamWriter(serviceConnection.getOutputStream()); 281 writer.write(checkRequest); 282 writer.flush(); 283 reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream())); 284 285 String line; 286 while ((line = reader.readLine()) != null) { 287 if (line.contains("READY")) { 288 jobs.put(job.getId(), job); 289 return true; 290 } else if (line.contains("WAITING")) { 291 job.setExpectedExecutionTime(job.getExpectedExecutionTime() + WAIT_INCREMENT); 292 jobs.put(job.getId(), job); 293 return false; 294 } else if (line.contains("UNKNOWN")) { 295 throw new IllegalArgumentException("Unknown request id - no results exist for it. Given id = " + id); 296 } 297 } 298 return false; 299 } catch (IOException ioe) { 300 throw new Exception("It is not possible to fetch Blast report from NCBI at this time. Cause: " + ioe.getMessage(), ioe); 301 } finally { 302 IOUtils.close(reader); 303 IOUtils.close(writer); 304 } 305 } 306 307 /** 308 * Extracts the actual Blast report for given request id according to options provided in {@code outputProperties} 309 * argument. 310 * <p/> 311 * If the results are not ready yet, sleeps until they are available. If sleeping is not desired, call this method 312 * after {@code isReady} returns true 313 * 314 * @param id : request id, which was returned by {@code sendAlignmentRequest} method 315 * @param outputProperties : an object specifying output formatting options 316 * @return an {@code InputStream} of results 317 * @throws Exception if it is not possible to recover the results 318 */ 319 @Override 320 public InputStream getAlignmentResults(String id, RemotePairwiseAlignmentOutputProperties outputProperties) throws Exception { 321 Map<String, String> params = new HashMap<String, String>(); 322 for (String key : outputProperties.getOutputOptions()) { 323 params.put(key, outputProperties.getOutputOption(key)); 324 } 325 OutputStreamWriter writer = null; 326 327 while (!isReady(id)) { 328 Thread.sleep(WAIT_INCREMENT + 100); 329 } 330 331 params.put(CMD.name(), "Get"); 332 params.put(RID.name(), id); 333 params.put(TOOL.name(), getTool()); 334 params.put(EMAIL.name(), getEmail()); 335 String getCmd = MAP_TO_STRING_TRANSFORMER.transform(params); 336 337 try { 338 URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection()); 339 writer = new OutputStreamWriter(serviceConnection.getOutputStream()); 340 writer.write(getCmd); 341 writer.flush(); 342 return serviceConnection.getInputStream(); 343 } catch (IOException ioe) { 344 throw new Exception("It is not possible to fetch Blast report from NCBI at this time. Cause: " + ioe.getMessage(), ioe); 345 } finally { 346 IOUtils.close(writer); 347 } 348 } 349 350 /** 351 * Sends a delete request for given request id. Optional operation, ignores IOExceptions.<br/> 352 * Can be used after results of given search are no longer needed to be kept on Blast server 353 * 354 * @param id request id, as returned by {@code sendAlignmentRequest} method 355 */ 356 public void sendDeleteRequest(String id) { 357 OutputStreamWriter writer = null; 358 try { 359 String deleteRequest = "CMD=Delete&RID=" + id; 360 URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection()); 361 writer = new OutputStreamWriter(serviceConnection.getOutputStream()); 362 writer.write(deleteRequest); 363 writer.flush(); 364 } catch (IOException ignore) { 365 // ignore it this is an optional operation 366 } finally { 367 IOUtils.close(writer); 368 } 369 } 370 371 /** 372 * Sets properties for given URLConnection 373 * 374 * @param conn URLConnection to set properties for 375 * @return given object after setting properties 376 */ 377 private URLConnection setQBlastServiceProperties(URLConnection conn) { 378 conn.setDoOutput(true); 379 conn.setUseCaches(false); 380 conn.setRequestProperty("User-Agent", "Biojava/NCBIQBlastService"); 381 conn.setRequestProperty("Connection", "Keep-Alive"); 382 conn.setRequestProperty("Content-type", "application/x-www-form-urlencoded"); 383 conn.setRequestProperty("Content-length", "200"); 384 return conn; 385 } 386 387 /** 388 * Set the tool identifier for QBlast. Defaults to {@value #DEFAULT_TOOL} 389 * 390 * @param tool the new identifier 391 */ 392 public void setTool(String tool) { 393 this.tool = tool; 394 } 395 396 /** 397 * Get the tool identifier for QBlast. Defaults to {@value #DEFAULT_TOOL} 398 * 399 * @return the identifier 400 */ 401 public String getTool() { 402 return this.tool; 403 } 404 405 /** 406 * Set the email for QBlast. Defaults to {@value #DEFAULT_EMAIL} 407 * 408 * @param email the new email 409 */ 410 public void setEmail(String email) { 411 this.email = email; 412 } 413 414 /** 415 * Get the email for QBlast. Defaults to {@value #DEFAULT_EMAIL}. 416 * 417 * @return the email 418 */ 419 public String getEmail() { 420 return this.email; 421 } 422}