001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojavax.bio.alignment.blast;
022
023import java.io.BufferedReader;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.io.OutputStreamWriter;
028import java.net.MalformedURLException;
029import java.net.URL;
030import java.net.URLConnection;
031import java.util.HashMap;
032
033import org.biojava.bio.BioException;
034import org.biojavax.bio.alignment.RemotePairwiseAlignmentProperties;
035import org.biojavax.bio.alignment.RemotePairwiseAlignmentService;
036import org.biojavax.bio.alignment.RemotePairwiseAlignmentOutputProperties;
037import org.biojavax.bio.seq.RichSequence;
038
039/**
040 * RemoteQBlastService - A simple way of submitting BLAST request to the QBlast
041 * service at NCBI.
042 * 
043 * <p>
044 * NCBI provides a Blast server through a CGI-BIN interface. RemoteQBlastService
045 * simply encapsulates an access to it by giving users access to get/set methods
046 * to fix sequence, program and database as well as advanced options.
047 * </p>
048 * 
049 * <p>
050 * The philosophy behind RemoteQBlastService is to disconnect submission of
051 * Blast requests from collection of Blast results. This is done so to allow a
052 * user to submit multiple Blast requests while allowing recovery of the reports
053 * at a later time.
054 * </p>
055 * 
056 * <p>
057 * Presently, only blastall programs are accessible. blastpgp and megablast are
058 * high-priorities.
059 * </p>
060 * 
061 * @author Sylvain Foisy, Diploide BioIT
062 * @since 1.8
063 * 
064 */
065public class RemoteQBlastService implements RemotePairwiseAlignmentService {
066
067        private static String baseurl = "https://blast.ncbi.nlm.nih.gov/Blast.cgi";
068        private URL aUrl;
069        private URLConnection uConn;
070        private OutputStreamWriter fromQBlast;
071        private BufferedReader rd;
072        
073        private String email = "anonymous@biojava.org";
074        private String tool = "biojavax";
075         
076
077        private String seq = null;
078        private String prog = null;
079        private String db = null;
080//      private String advanced = null;
081
082        private String rid;
083        private long step;
084        private long start;
085        private HashMap<String, Long> holder = new HashMap<String, Long>();
086
087        /**
088         * The constructor for a QBlast service request.
089         * 
090         * @throws BioException
091         *             if the NCBI URL is unresponsive
092         * 
093         */
094        public RemoteQBlastService() throws BioException {
095                try {
096                        aUrl = new URL(baseurl);
097                        uConn = setQBlastServiceProperties(aUrl.openConnection());
098                }
099                /*
100                 * Needed but should never be thrown since the URL is static and known
101                 * to exist
102                 */
103                catch (MalformedURLException e) {
104                        throw new BioException(
105                                        "It looks like the URL for NCBI QBlast service is wrong.\n");
106                }
107                /*
108                 * Intercept if the program can't connect to QBlast service
109                 */
110                catch (IOException e) {
111                        throw new BioException(
112                                        "Impossible to connect to QBlast service at this time. Check your network connection.\n");
113                }
114        }
115
116        /**
117         * This class is the actual worker doing all the dirty stuff related to
118         * sending the Blast request to the CGI_BIN interface. It should never be
119         * used as is but any method wanting to send a Blast request should manage
120         * to use it by feeding it the right parameters.
121         * 
122         * @param str : a <code>String</code> representation of a sequence from either of the
123         *              three wrapper methods
124         * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object
125         * @return rid : the ID of this request on the NCBI QBlast server
126         * @throws BioException if unable to connect to the NCBI QBlast service
127         */
128        private String sendActualAlignementRequest(String str,
129                        RemotePairwiseAlignmentProperties rpa) throws BioException {
130
131                seq = "QUERY=" + str;
132                prog = "PROGRAM=" + rpa.getAlignmentOption("PROGRAM");
133                db = "DATABASE=" + rpa.getAlignmentOption("DATABASE");
134
135                if (prog == null || db == null || str == null || str.length() == 0) {
136                        throw new BioException(
137                                        "Impossible to execute QBlast request. One or more of sequence|database|program has not been set correctly.\n");
138                }
139
140                String cmd = "CMD=Put&SERVICE=plain" + "&" + seq + "&" + prog + "&"
141                                + db + "&" + "FORMAT_TYPE=HTML"+"&TOOL="+getTool()+"&EMAIL="+getEmail();
142
143                // This is a not so good hack to be fix by forcing key 
144                // checking in RemoteQBlastAlignmentProperties
145                if (rpa.getAlignmentOption("OTHER_ADVANCED") != null) {
146                        cmd += cmd + "&" + rpa.getAlignmentOption("OTHER_ADVANCED");
147                }
148
149                try {
150
151                        uConn = setQBlastServiceProperties(aUrl.openConnection());
152
153                        fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
154
155                        fromQBlast.write(cmd);
156                        fromQBlast.flush();
157
158                        // Get the response
159                        rd = new BufferedReader(new InputStreamReader(uConn
160                                        .getInputStream()));
161
162                        String line = "";
163
164                        while ((line = rd.readLine()) != null) {
165                                if (line.contains("RID")) {
166                                        String[] arr = line.split("=");
167                                        rid = arr[1].trim();
168                                } else if (line.contains("RTOE")) {
169                                        String[] arr = line.split("=");
170                                        step = Long.parseLong(arr[1].trim()) * 1000;
171                                        start = System.currentTimeMillis() + step;
172                                }
173                                holder.put(rid, start);
174                        }
175                } catch (IOException e) {
176                        throw new BioException(
177                                        "Can't submit sequence to BLAST server at this time.\n");
178                }
179
180                return rid;
181        }
182
183        /**
184         * This method is a wrapper that executes the Blast request via the Put
185         * command of the CGI-BIN interface with the specified parameters and a
186         * string representing the sequence. It gets the estimated time of
187         * completion by capturing the value of the RTOE variable and sets a loop
188         * that will check for completion of analysis at intervals specified by
189         * RTOE.
190         * 
191         * <p>
192         * It also capture the value for the RID variable, necessary for fetching
193         * the actual results after completion.
194         * </p>
195         * 
196         * @param str : a <code>String</code> with a sequence
197         * @param rpa : a <code>RemotePairwiseAlignmentProperties</code> object
198         * @return rid : a <code>String</code> with the request ID for this sequence
199         * @throws BioException if it is not possible to sent the BLAST command
200         */
201        public String sendAlignmentRequest(String str,
202                        RemotePairwiseAlignmentProperties rpa) throws BioException {
203
204                /*
205                 * sending the command to execute the Blast analysis
206                 */
207                return rid = sendActualAlignementRequest(seq, rpa);
208        }
209
210        /**
211         * This method is a wrapper that executes the Blast request via the Put
212         * command of the CGI-BIN interface with the specified parameters and a
213         * RichSequence. It gets the estimated time of completion by capturing the
214         * value of the RTOE variable and sets a loop that will check for completion
215         * of analysis at intervals specified by RTOE.
216         * 
217         * <p>
218         * It also capture the value for the RID variable, necessary for fetching
219         * the actual results after completion.
220         * </p>
221         * 
222         * @param rs :a <code>RichSequence</code> object
223         * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object
224         * @return rid : a <code>String</code> with the request ID for this sequence
225         * 
226         * @throws BioException if it is not possible to sent the BLAST command
227         */
228        public String sendAlignmentRequest(RichSequence rs,
229                        RemotePairwiseAlignmentProperties rpa) throws BioException {
230
231                seq = rs.seqString();
232
233                return rid = sendActualAlignementRequest(seq, rpa);
234        }
235
236        /**
237         * This method is a wrapper that executes the Blast request via the Put
238         * command of the CGI-BIN interface with the specified parameters and a
239         * GenBank GID. It gets the estimated time of completion by capturing the
240         * value of the RTOE variable and sets a loop that will check for completion
241         * of analysis at intervals specified by RTOE.
242         * 
243         * <p>
244         * It also capture the value for the RID variable, necessary for fetching
245         * the actual results after completion.
246         * </p>
247         * 
248         * @param gid :an integer with a Genbank GID
249         * @param rpa :a <code>RemotePairwiseAlignmentProperties</code> object
250         * @return rid : a String with the request ID for this sequence
251         * @throws BioException if it is not possible to sent the BLAST command
252         */
253        public String sendAlignmentRequest(int gid,
254                        RemotePairwiseAlignmentProperties rpa) throws BioException {
255
256                seq = Integer.toString(gid);
257                return rid = sendActualAlignementRequest(seq, rpa);
258        }
259
260        /**
261         * <p>
262         * This method is used only for the executeBlastSearch method to check for
263         * completion of request using the NCBI specified RTOE variable
264         * </p>
265         * 
266         * @param id : a valid request ID
267         * @param present : a representation of "now" using System.currentTimeMillis().
268         * @return a boolean value telling if the request has been completed or not.
269         */
270        public boolean isReady(String id, long present) throws BioException {
271                boolean ready = false;
272                String check = "CMD=Get&RID=" + id;
273
274                if (holder.containsKey(id)) {
275                        /*
276                         * If present time is less than the start of the search added to
277                         * step obtained from NCBI, just do nothing ;-)
278                         * 
279                         * This is done so that we do not send zillions of requests to the
280                         * server. We do the waiting internally first.
281                         */
282                        if (present < start) {
283                                ready = false;
284                        }
285                        /*
286                         * If we are at least step seconds in the future from the actual
287                         * call sendAlignementRequest()
288                         */
289                        else {
290                                try {
291                                        uConn = setQBlastServiceProperties(aUrl.openConnection());
292
293                                        fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
294                                        fromQBlast.write(check);
295                                        fromQBlast.flush();
296
297                                        rd = new BufferedReader(new InputStreamReader(uConn
298                                                        .getInputStream()));
299
300                                        String line = "";
301
302                                        while ((line = rd.readLine()) != null) {
303                                                if (line.contains("READY")) {
304                                                        ready = true;
305                                                } else if (line.contains("WAITING")) {
306                                                        /*
307                                                         * Else, move start forward in time... for the next
308                                                         * iteration
309                                                         */
310                                                        start = present + step;
311                                                        holder.put(id, start);
312                                                }
313                                        }
314                                } catch (IOException e) {
315                                        e.printStackTrace();
316                                }
317                        }
318                } else {
319                        throw new BioException("Impossible to check for request ID named "
320                                        + id + " because it does not exists!\n");
321                }
322                return ready;
323        }
324
325        /**
326         * <p>
327         * This method extracts the actual Blast report for this request ID. It uses
328         * an object implementing the RemotePairwiseAlignmentOutputProperties
329         * interface which will specify output formatting options.
330         * </p>
331         * 
332         * @param id :a valid request ID
333         * @param rb : a <code>RemotePairwiseAlignmentOutputProperties</code> that
334         *             will specify specific output formatting commands
335         * @return an <code>InputStream</code> that can be use any way one might
336         *         desire
337         * @throws BioException if it is not possible to recover the results.
338         */
339        public InputStream getAlignmentResults(String id,
340                        RemotePairwiseAlignmentOutputProperties rb) throws BioException {
341                if (holder.containsKey(id)) {
342                        String srid = "CMD=Get&RID=" + id + "&"
343                                        + rb.getOutputOption("FORMAT_TYPE") + "&"
344                                        + rb.getOutputOption("ALIGNMENT_VIEW") + "&"
345                                        + rb.getOutputOption("DESCRIPTIONS") + "&"
346                                        + rb.getOutputOption("ALIGNMENTS")
347                                        + "&TOOL="+getTool()+"&EMAIL="+getEmail();
348
349                        try {
350                                uConn = setQBlastServiceProperties(aUrl.openConnection());
351
352                                fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
353                                fromQBlast.write(srid);
354                                fromQBlast.flush();
355
356                                return uConn.getInputStream();
357
358                        } catch (IOException ioe) {
359                                throw new BioException(
360                                                "It is not possible to fetch Blast report from NCBI at this time.\n");
361                        }
362                } else {
363                        throw new BioException(
364                                        "Impossible to get output for request ID named " + id
365                                                        + " because it does not exists!\n");
366                }
367        }
368
369        /**
370         * A simple method to check the availability of the QBlast service
371         * 
372         * @throws BioException if unable to connect to the NCBI QBlast service
373         */
374        public void printRemoteBlastInfo() throws BioException {
375                try {
376                        OutputStreamWriter out = new OutputStreamWriter(uConn
377                                        .getOutputStream());
378
379                        out.write("CMD=Info");
380                        out.flush();
381
382                        // Get the response
383                        BufferedReader rd = new BufferedReader(new InputStreamReader(uConn
384                                        .getInputStream()));
385
386                        String line = "";
387
388                        while ((line = rd.readLine()) != null) {
389                                System.out.println(line);
390                        }
391
392                        out.close();
393                        rd.close();
394                } catch (IOException e) {
395                        throw new BioException(
396                                        "Impossible to get info from QBlast service at this time. Check your network connection.\n");
397                }
398        }
399
400        private URLConnection setQBlastServiceProperties(URLConnection conn) {
401
402                URLConnection tmp = conn;
403
404                conn.setDoOutput(true);
405                conn.setUseCaches(false);
406
407                tmp.setRequestProperty("User-Agent", "Biojava/RemoteQBlastService");
408                tmp.setRequestProperty("Connection", "Keep-Alive");
409                tmp.setRequestProperty("Content-type",
410                                "application/x-www-form-urlencoded");
411                tmp.setRequestProperty("Content-length", "200");
412
413                return tmp;
414        }
415    /** 
416     * Set the tool identifier for QBlast. Defaults to 'biojavax'.
417     * @param tool the new identifier.
418     */
419    public void setTool(String tool) {
420        this.tool = tool;
421    }
422
423    /** 
424     * Get the tool identifier for QBlast. Defaults to 'biojavax'.
425     * @return the identifier.
426     */
427    public String getTool() {
428        return this.tool;
429    }
430
431    /** 
432     * Set the email for QBlast. Defaults to 'anonymous@biojava.org'.
433     * @param email the new email.
434     */
435    public void setEmail(String email) {
436        this.email = email;
437    }
438
439    /** 
440     * Get the email for QBlast. Defaults to 'anonymous@biojava.org'.
441     * @return the email.
442     */
443    public String getEmail() {
444        return this.email;
445    }   
446}