001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.ws.alignment.qblast;
023
024import org.biojava.nbio.core.sequence.io.util.IOUtils;
025import org.biojava.nbio.core.sequence.template.Compound;
026import org.biojava.nbio.core.sequence.template.Sequence;
027import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentOutputProperties;
028import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentProperties;
029import org.biojava.nbio.ws.alignment.RemotePairwiseAlignmentService;
030
031import java.io.*;
032import java.net.MalformedURLException;
033import java.net.URL;
034import java.net.URLConnection;
035import java.util.HashMap;
036import java.util.Map;
037
038import static org.biojava.nbio.ws.alignment.qblast.BlastAlignmentParameterEnum.*;
039import static org.biojava.nbio.ws.alignment.qblast.BlastOutputParameterEnum.RID;
040
041/**
042 * Provides a simple way of submitting BLAST request to the QBlast service at NCBI.
043 * <p>
044 * NCBI provides a Blast server through a CGI-BIN interface. This service simply encapsulates an access to it by giving
045 * users access to get/set methods to fix sequence, program and database as well as advanced options.
046 * </p>
047 * <p>
048 * The philosophy behind this service is to disconnect submission of Blast requests from collection of Blast results.
049 * This is done so to allow a user to submit multiple Blast requests while allowing recovery of the reports at a later
050 * time.
051 * </p>
052 * <p>
053 * Presently, only blastall programs are accessible.
054 * </p>
055 *
056 * @author Sylvain Foisy, Diploide BioIT
057 * @author Gediminas Rimsa
058 */
059public class NCBIQBlastService implements RemotePairwiseAlignmentService {
060        /**
061         * Number of milliseconds by which expected job execution time is incremented if it is not finished yet. Subsequent
062         * calls to {@link #isReady(String, long)} method will return false until at least this much time passes.
063         */
064        public static final long WAIT_INCREMENT = 3000;
065
066        private static final MapToStringTransformer MAP_TO_STRING_TRANSFORMER = new MapToStringTransformer();
067        private static final String SERVICE_URL = "https://blast.ncbi.nlm.nih.gov/Blast.cgi";
068        private static final String DEFAULT_EMAIL = "anonymous@biojava.org";
069        private static final String DEFAULT_TOOL = "biojava5";
070
071        private URL serviceUrl;
072        private String email = DEFAULT_EMAIL;
073        private String tool = DEFAULT_TOOL;
074
075        private Map<String, BlastJob> jobs = new HashMap<String, BlastJob>();
076
077    /** Constructs a service object that targets the public NCBI BLAST network
078     * service. 
079     */
080        public NCBIQBlastService() {
081        init(SERVICE_URL);
082        }
083
084    /** Constructs a service object which targets a custom NCBI BLAST network
085     * service (e.g.: an instance of BLAST in the cloud).
086     *
087         * @param svcUrl : a {@code String} containing the base URL to send requests to,
088     *                 e.g.: http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi
089     *
090     * @see <a href="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast">BLAST on the cloud documentation</a>
091     */
092        public NCBIQBlastService(String svcUrl) {
093        init(svcUrl);
094        }
095
096    /** Initialize the serviceUrl data member 
097     * @throws MalformedURLException on invalid URL
098     */
099    private void init(String svcUrl) {
100                try {
101                        serviceUrl = new URL(svcUrl);
102                } catch (MalformedURLException e) {
103            throw new RuntimeException("It looks like the URL for remote NCBI BLAST service (" 
104                                       + svcUrl + ") is wrong. Cause: " + e.getMessage(), e);
105                }
106    }
107
108        /**
109         * A simple method to check the availability of the QBlast service. Sends {@code Info} command to QBlast
110         *
111         * @return QBlast info output concatenated to String
112         * @throws Exception if unable to connect to the NCBI QBlast service
113         */
114        public String getRemoteBlastInfo() throws Exception {
115                OutputStreamWriter writer = null;
116                BufferedReader reader = null;
117                try {
118                        URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection());
119                        writer = new OutputStreamWriter(serviceConnection.getOutputStream());
120                        writer.write("CMD=Info");
121                        writer.flush();
122                        reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream()));
123                        StringBuilder sb = new StringBuilder();
124                        String line;
125                        while ((line = reader.readLine()) != null) {
126                                sb.append(line);
127                                sb.append(System.getProperty("line.separator"));
128                        }
129                        return sb.toString();
130                } catch (IOException e) {
131                        throw new Exception("Impossible to get info from QBlast service at this time. Cause: " + e.getMessage(), e);
132                } finally {
133                        IOUtils.close(reader);
134                        IOUtils.close(writer);
135                }
136        }
137
138        /**
139         * Converts given sequence to String and calls
140         * {@link #sendAlignmentRequest(String, RemotePairwiseAlignmentProperties)}
141         */
142        @Override
143        public String sendAlignmentRequest(Sequence<Compound> seq, RemotePairwiseAlignmentProperties rpa) throws Exception {
144                return sendAlignmentRequest(seq.getSequenceAsString(), rpa);
145        }
146
147        /**
148         * Converts given GenBank GID to String and calls
149         * {@link #sendAlignmentRequest(String, RemotePairwiseAlignmentProperties)}
150         */
151        public String sendAlignmentRequest(int gid, RemotePairwiseAlignmentProperties rpa) throws Exception {
152                return sendAlignmentRequest(Integer.toString(gid), rpa);
153        }
154
155        /**
156         * Sends the Blast request via the Put command of the CGI-BIN interface. Uses all of the parameters specified in
157         * {@code alignmentProperties} (parameters PROGRAM and DATABASE are required).
158         *
159         * @param query : a {@code String} representing a sequence or Genbank ID
160         * @param alignmentProperties : a {@code RemotePairwiseAlignmentProperties} object representing alignment properties
161         * @return the request id for this sequence, necessary to fetch results after completion
162         * @throws Exception if unable to connect to the NCBI QBlast service or if no sequence or required parameters
163         *             PROGRAM and DATABASE are not set
164         */
165        @Override
166        public String sendAlignmentRequest(String query, RemotePairwiseAlignmentProperties alignmentProperties) throws Exception {
167                Map<String, String> params = new HashMap<String, String>();
168                for (String key : alignmentProperties.getAlignmentOptions()) {
169                        params.put(key, alignmentProperties.getAlignmentOption(key));
170                }
171
172                if (query == null || query.isEmpty()) {
173                        throw new IllegalArgumentException("Impossible to execute QBlast request. The sequence has not been set.");
174                }
175                if (!params.containsKey(PROGRAM.name())) {
176                        throw new IllegalArgumentException("Impossible to execute QBlast request. Parameter PROGRAM has not been set.");
177                }
178                if (!params.containsKey(DATABASE.name())) {
179                        throw new IllegalArgumentException("Impossible to execute QBlast request. Parameter DATABASE has not been set.");
180                }
181
182                params.put(CMD.name(), "Put");
183                params.put(QUERY.name(), query);
184                params.put(TOOL.name(), getTool());
185                params.put(EMAIL.name(), getEmail());
186
187                String putCmd = MAP_TO_STRING_TRANSFORMER.transform(params);
188
189                OutputStreamWriter writer = null;
190                BufferedReader reader = null;
191                try {
192                        URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection());
193                        writer = new OutputStreamWriter(serviceConnection.getOutputStream());
194                        writer.write(putCmd);
195                        writer.flush();
196
197                        // Get the response
198                        reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream()));
199
200                        // find request id and time of execution
201                        BlastJob job = new BlastJob();
202                        String line;
203                        while ((line = reader.readLine()) != null) {
204                                if (!line.contains("class=\"error\"") && !line.contains("Message ID#")) {
205                                        // if there is no error, capture RID and RTOE
206                                        if (line.contains("RID = ")) {
207                                                String[] arr = line.split("=");
208                                                job.setId(arr[1].trim());
209                                        } else if (line.contains("RTOE = ")) {
210                                                String[] arr = line.split("=");
211                                                job.setStartTimestamp(System.currentTimeMillis());
212                                                job.setExpectedExecutionTime(Long.parseLong(arr[1].trim()) * 1000);
213                                        }
214                                        jobs.put(job.getId(), job);
215                                } else {
216                                        // handle QBlast error message
217
218                                        // Capture everything to the left of this HTML statement...
219                                        String[] tmp = line.split("</p></li></ul>");
220
221                                        // Only the error message is on the right side of this...
222                                        String[] moreTmp = tmp[0].split("<p class=\"error\">");
223                                        throw new Exception("NCBI QBlast refused this request because: " + moreTmp[1].trim());
224                                }
225
226                        }
227                        if (job != null && job.getId() != null) {
228                                return job.getId();
229                        } else {
230                                throw new Exception("Unable to retrieve request ID");
231                        }
232                } catch (IOException e) {
233                        throw new IOException("An error occured submiting sequence to BLAST server. Cause: " + e.getMessage(), e);
234                } finally {
235                        IOUtils.close(reader);
236                        IOUtils.close(writer);
237                }
238        }
239
240        /**
241         * Wrapper method for {@link #isReady(String, long)}, omitting unnecessary {@code present} property.
242         *
243         * @see #isReady(String, long)
244         */
245        public boolean isReady(String id) throws Exception {
246                return isReady(id, 0);
247        }
248
249        /**
250         * Checks for completion of request.
251         * <p/>
252         * If expected execution time (RTOE) is available for request, this method will always return false until that time
253         * passes. This is done to prevent sending unnecessary requests to the server.
254         *
255         * @param id : request id, which was returned by {@code sendAlignmentRequest} method
256         * @param present : is not used, can be any value
257         * @return a boolean value telling if the request has been completed
258         */
259        @Override
260        public boolean isReady(String id, long present) throws Exception {
261                BlastJob job = jobs.get(id);
262                if (job != null) {
263                        long expectedJobFinishTime = job.getStartTimestamp() + job.getExpectedExecutionTime();
264                        if (System.currentTimeMillis() < expectedJobFinishTime) {
265                                return false;
266                        }
267                } else {
268                        // it might be a valid job from another session
269                        job = new BlastJob();
270                        job.setId(id);
271                        job.setStartTimestamp(System.currentTimeMillis());
272                        job.setExpectedExecutionTime(0);
273                }
274
275                OutputStreamWriter writer = null;
276                BufferedReader reader = null;
277                try {
278                        String checkRequest = "CMD=Get&RID=" + job.getId() + "&FORMAT_OBJECT=SearchInfo";
279                        URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection());
280                        writer = new OutputStreamWriter(serviceConnection.getOutputStream());
281                        writer.write(checkRequest);
282                        writer.flush();
283                        reader = new BufferedReader(new InputStreamReader(serviceConnection.getInputStream()));
284
285                        String line;
286                        while ((line = reader.readLine()) != null) {
287                                if (line.contains("READY")) {
288                                        jobs.put(job.getId(), job);
289                                        return true;
290                                } else if (line.contains("WAITING")) {
291                                        job.setExpectedExecutionTime(job.getExpectedExecutionTime() + WAIT_INCREMENT);
292                                        jobs.put(job.getId(), job);
293                                        return false;
294                                } else if (line.contains("UNKNOWN")) {
295                                        throw new IllegalArgumentException("Unknown request id - no results exist for it. Given id = " + id);
296                                }
297                        }
298                        return false;
299                } catch (IOException ioe) {
300                        throw new Exception("It is not possible to fetch Blast report from NCBI at this time. Cause: " + ioe.getMessage(), ioe);
301                } finally {
302                        IOUtils.close(reader);
303                        IOUtils.close(writer);
304                }
305        }
306
307        /**
308         * Extracts the actual Blast report for given request id according to options provided in {@code outputProperties}
309         * argument.
310         * <p/>
311         * If the results are not ready yet, sleeps until they are available. If sleeping is not desired, call this method
312         * after {@code isReady} returns true
313         *
314         * @param id : request id, which was returned by {@code sendAlignmentRequest} method
315         * @param outputProperties : an object specifying output formatting options
316         * @return an {@code InputStream} of results
317         * @throws Exception if it is not possible to recover the results
318         */
319        @Override
320        public InputStream getAlignmentResults(String id, RemotePairwiseAlignmentOutputProperties outputProperties) throws Exception {
321                Map<String, String> params = new HashMap<String, String>();
322                for (String key : outputProperties.getOutputOptions()) {
323                        params.put(key, outputProperties.getOutputOption(key));
324                }
325                OutputStreamWriter writer = null;
326
327                while (!isReady(id)) {
328                        Thread.sleep(WAIT_INCREMENT + 100);
329                }
330
331                params.put(CMD.name(), "Get");
332                params.put(RID.name(), id);
333                params.put(TOOL.name(), getTool());
334                params.put(EMAIL.name(), getEmail());
335                String getCmd = MAP_TO_STRING_TRANSFORMER.transform(params);
336
337                try {
338                        URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection());
339                        writer = new OutputStreamWriter(serviceConnection.getOutputStream());
340                        writer.write(getCmd);
341                        writer.flush();
342                        return serviceConnection.getInputStream();
343                } catch (IOException ioe) {
344                        throw new Exception("It is not possible to fetch Blast report from NCBI at this time. Cause: " + ioe.getMessage(), ioe);
345                } finally {
346                        IOUtils.close(writer);
347                }
348        }
349
350        /**
351         * Sends a delete request for given request id. Optional operation, ignores IOExceptions.<br/>
352         * Can be used after results of given search are no longer needed to be kept on Blast server
353         *
354         * @param id request id, as returned by {@code sendAlignmentRequest} method
355         */
356        public void sendDeleteRequest(String id) {
357                OutputStreamWriter writer = null;
358                try {
359                        String deleteRequest = "CMD=Delete&RID=" + id;
360                        URLConnection serviceConnection = setQBlastServiceProperties(serviceUrl.openConnection());
361                        writer = new OutputStreamWriter(serviceConnection.getOutputStream());
362                        writer.write(deleteRequest);
363                        writer.flush();
364                } catch (IOException ignore) {
365                        // ignore it this is an optional operation
366                } finally {
367                        IOUtils.close(writer);
368                }
369        }
370
371        /**
372         * Sets properties for given URLConnection
373         *
374         * @param conn URLConnection to set properties for
375         * @return given object after setting properties
376         */
377        private URLConnection setQBlastServiceProperties(URLConnection conn) {
378                conn.setDoOutput(true);
379                conn.setUseCaches(false);
380                conn.setRequestProperty("User-Agent", "Biojava/NCBIQBlastService");
381                conn.setRequestProperty("Connection", "Keep-Alive");
382                conn.setRequestProperty("Content-type", "application/x-www-form-urlencoded");
383                conn.setRequestProperty("Content-length", "200");
384                return conn;
385        }
386
387        /**
388         * Set the tool identifier for QBlast. Defaults to {@value #DEFAULT_TOOL}
389         *
390         * @param tool the new identifier
391         */
392        public void setTool(String tool) {
393                this.tool = tool;
394        }
395
396        /**
397         * Get the tool identifier for QBlast. Defaults to {@value #DEFAULT_TOOL}
398         *
399         * @return the identifier
400         */
401        public String getTool() {
402                return this.tool;
403        }
404
405        /**
406         * Set the email for QBlast. Defaults to {@value #DEFAULT_EMAIL}
407         *
408         * @param email the new email
409         */
410        public void setEmail(String email) {
411                this.email = email;
412        }
413
414        /**
415         * Get the email for QBlast. Defaults to {@value #DEFAULT_EMAIL}.
416         *
417         * @return the email
418         */
419        public String getEmail() {
420                return this.email;
421        }
422}