001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.ws.hmmer;
022
023import net.sf.json.JSONArray;
024import net.sf.json.JSONObject;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029import java.io.*;
030import java.net.HttpURLConnection;
031import java.net.URL;
032import java.util.SortedSet;
033import java.util.TreeSet;
034
035
036/**
037 * Makes remote calls to the HMMER web service at the EBI web site and returns Pfam domain annotations for an input protein sequence.
038 *
039 * @author Andreas Prlic
040 * @since 3.0.3
041 */
042public class RemoteHmmerScan implements HmmerScan {
043
044        private static final Logger LOGGER = LoggerFactory.getLogger(RemoteHmmerScan.class);
045
046        public static final String HMMER_SERVICE = "https://www.ebi.ac.uk/Tools/hmmer/search/hmmscan";
047
048        public RemoteHmmerScan(){
049
050        }
051
052
053        @Override
054        public  SortedSet<HmmerResult> scan(ProteinSequence sequence) throws IOException {
055
056                URL url = new URL(HMMER_SERVICE);
057
058                return scan(sequence, url);
059
060        }
061
062        /**
063         * Scans a protein sequence for Pfam profile matches.
064         *
065         * @param sequence
066         * @param serviceLocation
067         * @return
068         * @throws IOException
069         */
070        public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation) throws IOException{
071
072                StringBuffer postContent = new StringBuffer();
073
074                postContent.append("hmmdb=pfam");
075
076
077                // by default hmmscan runs with the HMMER3 cut_ga parameter enabled, the "gathering threshold", which depends on
078                // the cutoffs defined in the underlying HMM files.
079                // to request a different cutoff by e-value this could be enabled:
080                //postContent.append("&E=1");
081
082
083                postContent.append("&seq=");
084                postContent.append(sequence.getSequenceAsString());
085
086
087                HttpURLConnection connection = (HttpURLConnection) serviceLocation.openConnection();
088                connection.setDoOutput(true);
089                connection.setDoInput(true);
090                connection.setConnectTimeout(15000); // 15 sec
091                connection.setInstanceFollowRedirects(false);
092                connection.setRequestMethod("POST");
093                connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
094
095                connection.setRequestProperty("Accept","application/json");
096
097                connection.setRequestProperty("Content-Length", "" +
098                                Integer.toString(postContent.toString().getBytes().length));
099
100                //Send request
101                DataOutputStream wr = new DataOutputStream (
102                                connection.getOutputStream ());
103                wr.write(postContent.toString().getBytes());
104                wr.flush ();
105                wr.close ();
106
107
108                //Now get the redirect URL
109                URL respUrl = new URL( connection.getHeaderField( "Location" ));
110
111                int responseCode = connection.getResponseCode();
112                if ( responseCode == 500){
113                        LOGGER.warn("Got 500 response code for URL {}. Response message: {}.", serviceLocation, connection.getResponseMessage());
114                }
115
116                HttpURLConnection connection2 = (HttpURLConnection) respUrl.openConnection();
117                connection2.setRequestMethod("GET");
118                connection2.setRequestProperty("Accept", "application/json");
119                connection2.setConnectTimeout(60000); // 1 minute
120
121                //Get the response
122                BufferedReader in = new BufferedReader(
123                                new InputStreamReader(
124                                                connection2.getInputStream()));
125
126                String inputLine;
127
128                StringBuffer result = new StringBuffer();
129                while ((inputLine = in.readLine()) != null) {
130                        result.append(inputLine);
131                }
132
133                in.close();
134
135                // process the response and build up a container for the data.
136
137                SortedSet<HmmerResult> results = new TreeSet<HmmerResult>();
138                try {
139                        JSONObject json =  JSONObject.fromObject(result.toString());
140
141                        JSONObject hmresults = json.getJSONObject("results");
142
143
144                        JSONArray hits = hmresults.getJSONArray("hits");
145
146                        for(int i =0 ; i < hits.size() ; i++){
147                                JSONObject hit = hits.getJSONObject(i);
148
149                                HmmerResult hmmResult = new HmmerResult();
150
151                                Object dclO = hit.get("dcl");
152                                Integer dcl = -1;
153                                if ( dclO instanceof Long){
154                                        Long dclL = (Long) dclO;
155                                        dcl = dclL.intValue();
156                                } else if ( dclO instanceof Integer){
157                                        dcl = (Integer) dclO;
158                                }
159
160
161                                hmmResult.setAcc((String)hit.get("acc"));
162                                hmmResult.setDcl(dcl);
163                                hmmResult.setDesc((String)hit.get("desc"));
164                                hmmResult.setEvalue(Float.parseFloat((String)hit.get("evalue")));
165                                hmmResult.setName((String)hit.get("name"));
166                                hmmResult.setNdom((Integer)hit.get("ndom"));
167                                hmmResult.setNreported((Integer)hit.get("nreported"));
168                                hmmResult.setPvalue((Double)hit.get("pvalue"));
169                                hmmResult.setScore(Float.parseFloat((String)hit.get("score")));
170
171                                JSONArray hmmdomains = hit.getJSONArray("domains");
172
173                                SortedSet<HmmerDomain> domains = new TreeSet<HmmerDomain>();
174                                for ( int j= 0 ; j < hmmdomains.size() ; j++){
175                                        JSONObject d = hmmdomains.getJSONObject(j);
176                                        Integer is_included = getInteger(d.get("is_included"));
177                                        if ( is_included == 0) {
178                                                continue;
179                                        }
180
181
182                                        // this filters out multiple hits to the same clan
183                                        Integer outcompeted = getInteger(d.get("outcompeted"));
184                                        if ( outcompeted != null && outcompeted == 1) {
185                                                continue;
186                                        }
187
188                                        Integer significant = getInteger(d.get("significant"));
189
190                                        if (  significant != 1) {
191                                                continue;
192                                        }
193
194                                        HmmerDomain dom = new HmmerDomain();
195                                        dom.setAliLenth((Integer)d.get("aliL"));
196                                        dom.setHmmAcc((String)d.get("alihmmacc"));
197                                        dom.setHmmDesc((String)d.get("alihmmdesc"));
198
199                                        dom.setHmmFrom(getInteger(d.get("alihmmfrom")));
200                                        dom.setHmmTo(getInteger(d.get("alihmmto")));
201                                        dom.setSimCount((Integer) d.get("aliSimCount"));
202                                        dom.setSqFrom(getInteger(d.get("alisqfrom")));
203                                        dom.setSqTo(getInteger(d.get("alisqto")));
204                                        dom.setHmmName((String)d.get("alihmmname"));
205                                        dom.setEvalue(Float.parseFloat((String)d.get("ievalue")));
206
207                                        domains.add(dom);
208
209
210                                }
211
212                                hmmResult.setDomains(domains);
213
214                                results.add(hmmResult);
215                        }
216                } catch (NumberFormatException e){
217                        LOGGER.warn("Could not parse number in Hmmer web service json response: {}", e.getMessage());
218                }
219
220                return results;
221
222        }
223
224
225        private Integer getInteger(Object object) {
226                if ( object instanceof Integer)
227                        return (Integer) object;
228                else if( object instanceof String)
229                        return Integer.parseInt((String) object);
230
231                return null;
232        }
233
234}